#' @title The ffdata_download function
#'
#' @description This function automatically downloads user-specified portfolio returns from
#' the Kenneth R. French Data Library.
#' The downloaded returns are saved as a .csv-file in the chosen directory.
#'
#' @param dir a character string, the directory for saving the data.
#' The current working directory is set by default.
#' @param type a character string, the portfolio type.
#' Possible values are "USResearch" (default) for the Fama-French 3 or 5 Factors,
#' "Industry" for the Industry portfolios, "Bivariate" for the Bivariate sorts on Size (ME),
#' Book-to-Market (BE), Operational Profitability (OP) and Investment (INV),
#' "Threeway" for the ME, BE, OP, and INV.
#' @param subtype a character string, the portfolio subtype.
#' Possible values are "ME_BE", "ME_OP", "ME_INV", "BEME_OP", "BEME_INV", "OP_INV" (for the Bivariate type) and
#' "ME_BEME_OP", "ME_BEME_INV", "ME_OP_INV" (for the Threeway type).
#' @param number_factors an integer, the number of factors for the portfolios.
#' Possible values are 3 (default) or 5 (for the USResearch portfolios);
#' 5 (default), 10, 12, 17, 30, 38, 48 or 49 (for the Industry portfolios) and
#' 6 (default), 25 or 100 (for the Bivariate portfolios).
#' @param freq a character string, the frequency of returns.
#' Possible values are "m" for monthly (default) or "d" for daily.
#' @param dividends a logical, TRUE (default) considers dividends. FALSE downloads the data without dividends.
#' @param start a character string, start date for the download in the format "YYmm". Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned. # nolint: line_length_linter.
#' @return a .csv-file within the directory, defined in dir.
#'
#' @examples
#' \dontrun{
#' ffdata_download()
#' ffdata_download(freq = "m", type = "Bivariate", subtype = "ME_BE", number_factors = 25)
#' }
#' @export ffdata_download
#' @import utils
ffdata_download <- function(
dir = NULL,
type = "USResearch",
subtype = NULL,
number_factors = NULL,
freq = "m",
dividends = TRUE,
start = NULL,
end = NULL,
clean_na = TRUE) {
base <- "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
if (is.null(start)) {
start <- "197501"
}
if (is.null(end)) {
end <- format(zoo::as.yearmon(Sys.Date()) - .2, "%Y%m")
}
if (is.null(dir)) {
dir <- getwd()
}
try(if (freq == "d" & dividends == TRUE) {
stop("Please, set dividends TRUE only for a monthly frequency.")
})
try(if (freq == "d" & type == "Threeway") {
stop("Please, set the Threeway portfolio type only for a monthly frequency.")
})
sub_path <- paste(type, freq, start, end, sep = "_")
dir.create(sub_path, showWarnings = FALSE)
if (type == "USResearch") {
if (is.null(number_factors)) {
number_factors <- 3
}
} else if (type == "Industry") {
if (is.null(number_factors)) {
number_factors <- 5
}
} else if (type == "Bivariate") {
if (is.null(subtype)) {
subtype <- "ME_BE"
}
if (is.null(number_factors)) {
number_factors <- 6
}
} else if (type == "Threeway") {
if (is.null(subtype)) {
subtype <- "ME_BEME_OP"
}
}
factors_usr_all <- c(3, 5)
factors_ind_all <- c(5, 10, 12, 17, 30, 38, 48, 49)
factors_bi_all <- c(6, 25, 100)
subtypes_bivar_all <-
c("ME_BE", "ME_OP", "ME_INV", "BEME_OP", "BEME_INV", "OP_INV")
subtypes_threeway_all <-
c("ME_BEME_OP", "ME_BEME_INV", "ME_OP_INV")
if (type == "USResearch") {
if (is.na(match(number_factors, factors_usr_all))) {
stop(
"Please, set the argument number_factors to one of the following for the USResearch Dataset: 3, 5."
)
} else {
usresearch_download(
base,
dir,
sub_path,
number_factors,
freq,
start,
end,
clean_na
)
}
} else if (type == "Industry") {
if (is.na(match(number_factors, factors_ind_all))) {
stop(
"Please, set the argument number_factors to one of the following for the Industry Dataset:
5, 10, 12, 17, 30, 38, 48, 49."
)
} else {
industry_download(
base,
dir,
sub_path,
number_factors,
freq,
dividends,
start,
end,
clean_na
)
}
} else if (type == "Bivariate") {
if (is.na(match(subtype, subtypes_bivar_all))) {
stop(
"Please, set the argument subtype to one of the following for the Bivariate Dataset:
ME_BE, ME_OP, ME_INV, BEME_OP, BEME_INV, OP_INV."
)
} else if (is.na(match(number_factors, factors_bi_all))) {
stop(
"Please, set the argument number_factors to one of the following for the Bivariate Dataset: 6,25,100."
)
} else {
if ((subtype == "BEME_OP" || subtype == "BEME_INV" || subtype == "OP_INV") && number_factors != 25) {
stop(
"Please, set the argument number_factors to 25 for the Bivariate BEME_OP, BEME_INV or OP_INV datasets."
)
}
bivariate_download(
base,
dir,
sub_path,
number_factors,
subtype,
freq,
dividends,
start,
end,
clean_na
)
}
} else if (type == "Threeway") {
if (is.na(match(subtype, subtypes_threeway_all))) {
stop(
"Please, set the argument subtype to one of the following for the Threeway Dataset:
ME_BEME_OP, ME_BEME_INV, ME_OP_INV."
)
} else {
threeway_download(
base,
dir,
sub_path,
subtype,
dividends,
start,
end,
clean_na
)
}
}
setwd(dir)
}
#' @title The usresearch_download function
#'
#' @description This function automatically downloads US-Research portfolios returns
#' from the Kenneth R. French Data Library.
#' The downloaded returns are saved then as a .csv-file in the chosen directory.
#'
#' @param base a character string, the Kenneth R. French Data Library address.
#' @param dir a character string, the directory for saving the data.
#' The current working directory is set by default.
#' @param sub_path a character string, the folder subpath, created by ffDataDownload.
#' @param number_factors an integer, the number of factors for the USResearch portfolios.
#' Possible values are 3 (default) and 5.
#' @param freq a character string, the frequency of returns.
#' Possible values are "m" for monthly (default) or "d" for daily.
#' @param start a character string, start date for the download in the format "YYmm".
#' Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned.
#' @return a .csv-file within the directory, defined in dir.
#' @export usresearch_download
usresearch_download <-
function(base,
dir,
sub_path,
number_factors,
freq,
start,
end,
clean_na) {
setwd(sub_path)
format <- "_CSV.zip"
if (number_factors == 3) {
factors <- "F-F_Research_Data_Factors"
if (freq == "d") {
factors <- paste(factors, "_daily", sep = "")
} else {
factors <- factors
}
full_url <- paste(base, factors, format, sep = "")
temp <- tempfile()
download.file(full_url, temp, quiet = TRUE)
data <- unzip(temp)
text <- readLines(data)
skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
data_rets <-
read.csv(
data,
skip = skip_indx,
header = TRUE,
stringsAsFactors = FALSE
)
colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
if (is.na(end_indx)) {
final <-
format(max(as.Date(paste(
substr(data_rets[, 1], 1, 6), 1
), "%Y%m%d"), na.rm = TRUE), "%Y%m")
print(
paste(
"The end point ",
end,
" is still not present in the data.
The time series' end is set to automatically to the latest reported date ",
final,
".",
sep = ""
)
)
end_indx <- which(substr(data_rets[, 1], 1, 6) == final)[1]
}
data_rets <- data_rets[begin_indx:end_indx, ]
data_rets <- data_rets[order(data_rets[, 1]), ]
dates <- data_rets[, 1]
rets <- data_rets[, -1]
rets <- apply(rets, 2, as.numeric)
if (clean_na) {
index_na <-
apply(rets, 2, function(x) {
which(ceiling(x) == -99 | ceiling(x) == -999)
})
if (length(index_na) != 0) {
for (m in seq_len(ncol(rets))) {
if (length(index_na[[m]]) != 0) {
rets[index_na[[m]], m] <- NA
rets[, m] <- zoo::na.locf(rets[, m], na.rm = FALSE)
}
}
}
}
rets <- rets / 100
data_rets <- data.frame(dates, rets)
colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
files <- list.files()
del_indx <- substr(files, nchar(files) - (3 - 1), nchar(files))
index_del <- which(del_indx == "CSV")
file.remove(files[index_del])
write.csv(data_rets, paste0(factors, ".csv", sep = ""),
row.names =
FALSE
)
} else {
factors <- "F-F_Research_Data_5_Factors_2x3"
if (freq == "d") {
factors <- paste(factors, "_daily", sep = "")
} else {
factors <- factors
}
full_url <- paste(base, factors, format, sep = "")
## download
temp <- tempfile()
download.file(full_url, temp, quiet = TRUE)
## open
data <- unzip(temp)
text <- readLines(data)
skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
data_rets <-
read.csv(
data,
skip = skip_indx,
header = TRUE,
stringsAsFactors = FALSE
)
colnames(data_rets) <-
c("Date", colnames(data_rets)[-1])
begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
if (is.na(end_indx)) {
final <-
format(max(as.Date(paste(
substr(data_rets[, 1], 1, 6), 1
), "%Y%m%d"), na.rm = TRUE), "%Y%m")
print(
paste(
"The end point ",
end,
" is still not present in the data.
The time series' end is set to automatically to the latest reported date ",
final,
".",
sep = ""
)
)
end_indx <-
which(substr(data_rets[, 1], 1, 6) == final)[1]
}
data_rets <- data_rets[begin_indx:end_indx, ]
data_rets <- data_rets[order(data_rets[, 1]), ]
dates <- data_rets[, 1]
rets <- data_rets[, -1]
rets <- apply(rets, 2, as.numeric)
if (clean_na) {
index_na <-
apply(rets, 2, function(x) {
which(ceiling(x) == -99 | ceiling(x) == -999)
})
if (length(index_na) != 0) {
for (m in seq_len(ncol(rets))) {
if (length(index_na[[m]]) != 0) {
rets[index_na[[m]], m] <- NA
rets[, m] <-
zoo::na.locf(rets[, m], na.rm = FALSE)
}
}
}
}
rets <- rets / 100
data_rets <- data.frame(dates, rets)
colnames(data_rets) <-
c("Date", colnames(data_rets)[-1])
files <- list.files()
del_indx <-
substr(files, nchar(files) - (3 - 1), nchar(files))
index_del <- which(del_indx == "CSV")
file.remove(files[index_del])
write.csv(data_rets, paste0(factors, ".csv", sep = ""),
row.names =
FALSE
)
}
setwd(dir)
}
#' @title The industry_download function
#'
#' @description This function automatically downloads Industry portfolios returns
#' from the Kenneth R. French Data Library.
#' The downloaded returns are saved then as a .csv-file in the chosen directory.
#'
#' @param base a character string, the Kenneth R. French Data Library address.
#' @param dir a character string, the directory for saving the data. The current working directory is set by default.
#' @param sub_path a character string, the folder subpath, created by ffDataDownload.
#' @param number_factors an integer, the number of factors for the Industry portfolios.
#' Possible values are 5 (default), 10, 12, 17, 30, 38, 48 or 49.
#' @param freq a character string, the frequency of the returns, "m" for monthly (default) or "d" for daily.
#' @param dividends a logical, TRUE (default) considers dividends. FALSE downloads the data without dividends.
#' @param start a character string, start date for the download in the format "YYmm".
#' Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned.
#' @return a .csv-file within the directory, defined in dir.
#' @export industry_download
industry_download <-
function(base,
dir,
sub_path,
number_factors,
freq,
dividends,
start,
end,
clean_na) {
setwd(sub_path)
format <- "_CSV.zip"
## url
factors <-
paste(number_factors, "Industry_Portfolios", sep = "_")
if (freq == "d") {
factors <- paste(factors, "_daily", sep = "")
} else {
if (dividends) {
factors <- factors
} else {
factors <- paste(factors, "_Wout_Div", sep = "")
}
}
## url
full_url <- paste(base, factors, format, sep = "")
## download (main)
temp <- tempfile()
download.file(full_url, temp, quiet = TRUE)
## open
data <- unzip(temp)
text <- readLines(data)
skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
data_rets <-
read.csv(
data,
skip = skip_indx,
header = TRUE,
stringsAsFactors = FALSE
)
colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
if (is.na(end_indx)) {
final <-
format(max(as.Date(paste(
substr(data_rets[, 1], 1, 6), 1
), "%Y%m%d"), na.rm = TRUE), "%Y%m")
print(
paste(
"The end point ",
end,
" is still not present in the data.
The time series' end is set to automatically to the latest reported date ",
final,
".",
sep = ""
)
)
end_indx <- which(substr(data_rets[, 1], 1, 6) == final)[1]
}
data_rets <- data_rets[begin_indx:end_indx, ]
data_rets <- data_rets[order(data_rets[, 1]), ]
dates <- data_rets[, 1]
rets <- data_rets[, -1]
rets <- apply(rets, 2, as.numeric)
if (clean_na) {
index_na <-
apply(rets, 2, function(x) {
which(ceiling(x) == -99 | ceiling(x) == -999)
})
if (length(index_na) != 0) {
for (m in seq_len(ncol(rets))) {
if (length(index_na[[m]]) != 0) {
rets[index_na[[m]], m] <- NA
rets[, m] <- zoo::na.locf(rets[, m], na.rm = FALSE)
}
}
}
}
rets <- rets / 100
data_rets <- data.frame(dates, rets)
colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
files <- list.files()
del_indx <- substr(files, nchar(files) - (3 - 1), nchar(files))
index_del <- which(del_indx == "CSV")
file.remove(files[index_del])
write.csv(data_rets, paste0(factors, ".csv", sep = ""),
row.names =
FALSE
)
setwd(dir)
}
#' @title The bivariate_download function
#'
#' @description This function automatically downloads Bivariate portfolios returns
#' from the Kenneth R. French Data Library.
#' The downloaded returns are saved then as a .csv-file in the chosen directory.
#'
#' @param base a character string, the Kenneth R. French Data Library address.
#' @param dir a character string, the directory for saving the data. The current working directory is set by default.
#' @param sub_path a character string, the folder subpath, created by ffDataDownload.
#' @param number_factors an integer, the number of factors for the Bivariate portfolios.
#' Possible values are 6 (default), 25 or 100.
#' @param subtype a character string, the Bivariate portfolio subtype.
#' Possible values are "ME_BE" (default), "ME_OP", "ME_INV", "BEME_OP", "BEME_INV", "OP_INV".
#' @param freq a character string, the frequency of the returns, "m" for monthly (default) or "d" for daily.
#' @param dividends a logical, TRUE (default) considers dividends. FALSE downloads the data without dividends.
#' @param start a character string, start date for the download in the format "YYmm". Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned.
#' @return a .csv-file within the directory, defined in dir.
#' @export bivariate_download
bivariate_download <-
function(base,
dir,
sub_path,
number_factors,
subtype,
freq,
dividends,
start,
end,
clean_na) {
setwd(sub_path)
format <- "_CSV.zip"
if (subtype == "ME_BE") {
subtype <- ""
} # according to FamaFrench webseite
if (subtype == "") {
if (number_factors == 6) {
factors <- paste(number_factors, "Portfolios", "2x3", sep = "_")
} else if (number_factors == 25) {
factors <- paste(number_factors, "Portfolios", "5x5", sep = "_")
} else {
factors <- paste(number_factors, "Portfolios", "10x10", sep = "_")
}
} else {
if (number_factors == 6) {
factors <-
paste(number_factors, "Portfolios", subtype, "2x3", sep = "_")
} else if (number_factors == 25) {
factors <-
paste(number_factors, "Portfolios", subtype, "5x5", sep = "_")
} else {
factors <-
paste(number_factors,
"Portfolios",
subtype,
"10x10",
sep = "_"
)
}
}
if (freq == "d") {
factors <- paste(factors, "_daily", sep = "")
} else {
factors <- factors
if (dividends) {
factors <- factors
} else {
if ((subtype == "ME_OP" || subtype == "ME_INV") && number_factors == 100) {
factors <-
paste(number_factors,
"Portfolios",
"10x10",
subtype,
"Wout_Div",
sep = "_"
)
} else {
factors <- paste(factors, "_Wout_Div", sep = "")
}
}
}
## url(s)
full_url <- paste(base, factors, format, sep = "")
## download (main)
temp <- tempfile()
download.file(full_url, temp, quiet = TRUE)
## open
data <- unzip(temp)
text <- readLines(data)
skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
data_rets <-
read.csv(
data,
skip = skip_indx,
header = TRUE,
stringsAsFactors = FALSE
)
colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
if (is.na(end_indx)) {
final <-
format(max(as.Date(paste(
substr(data_rets[, 1], 1, 6), 1
), "%Y%m%d"), na.rm = TRUE), "%Y%m")
print(
paste(
"The end point ",
end,
" is still not present in the data.
The time series' end is set to automatically to the latest reported date ",
final,
".",
sep = ""
)
)
end_indx <- which(substr(data_rets[, 1], 1, 6) == final)[1]
}
data_rets <- data_rets[begin_indx:end_indx, ]
data_rets <- data_rets[order(data_rets[, 1]), ]
dates <- data_rets[, 1]
rets <- data_rets[, -1]
rets <- apply(rets, 2, as.numeric)
if (clean_na) {
index_na <-
apply(rets, 2, function(x) {
which(ceiling(x) == -99 | ceiling(x) == -999)
})
if (length(index_na) != 0) {
for (m in seq_len(ncol(rets))) {
if (length(index_na[[m]]) != 0) {
rets[index_na[[m]], m] <- NA
rets[, m] <- zoo::na.locf(rets[, m], na.rm = FALSE)
}
}
}
}
rets <- rets / 100
data_rets <- data.frame(dates, rets)
colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
files <- list.files()
del_indx <- substr(files, nchar(files) - (3 - 1), nchar(files))
index_del <- which(del_indx == "CSV")
file.remove(files[index_del])
write.csv(data_rets, paste0(factors, ".csv", sep = ""),
row.names =
FALSE
)
setwd(dir)
}
#' @title The threeway_download function
#'
#' @description This function automatically downloads Threeway portfolios returns
#' from the Kenneth R. French Data Library.
#' The downloaded returns are saved then as a .csv-file in the chosen directory.
#'
#' @param base a character string, the Kenneth R. French Data Library address.
#' @param dir a character string, the directory for saving the data. The current working directory is set by default.
#' @param sub_path a character string, the folder subpath, created by ffDataDownload.
#' @param subtype a character string, the Threeway portfolio subtype.
#' Possible values are "ME_BEME_OP" (default), "ME_BEME_INV" or "ME_OP_INV".
#' @param dividends a logical, TRUE (default) considers dividends. FALSE downloads the data without dividends.
#' @param start a character string, start date for the download in the format "YYmm". Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned.
#' @return a .csv-file within the directory, defined in dir.
#' @export threeway_download
threeway_download <-
function(base,
dir,
sub_path,
subtype,
dividends,
start,
end,
clean_na) {
setwd(sub_path)
format <- "_CSV.zip"
factors <- paste(32, "Portfolios", subtype, "2x4x4", sep = "_")
if (dividends) {
factors <- factors
} else {
factors <- paste(factors, "_Wout_Div", sep = "")
}
full_url <- paste(base, factors, format, sep = "")
## download (main)
temp <- tempfile()
download.file(full_url, temp, quiet = TRUE)
## open
data <- unzip(temp)
text <- readLines(data)
skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
data_rets <-
read.csv(
data,
skip = skip_indx,
header = TRUE,
stringsAsFactors = FALSE
)
colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
if (is.na(end_indx)) {
final <-
format(max(as.Date(paste(
substr(data_rets[, 1], 1, 6), 1
), "%Y%m%d"), na.rm = TRUE), "%Y%m")
print(
paste(
"The end point ",
end,
" is still not present in the data.
The time series' end is set to automatically to the latest reported date ",
final,
".",
sep = ""
)
)
end_indx <- which(substr(data_rets[, 1], 1, 6) == final)[1]
}
data_rets <- data_rets[begin_indx:end_indx, ]
data_rets <- data_rets[order(data_rets[, 1]), ]
dates <- data_rets[, 1]
rets <- data_rets[, -1]
rets <- apply(rets, 2, as.numeric)
if (clean_na) {
index_na <-
apply(rets, 2, function(x) {
which(ceiling(x) == -99 | ceiling(x) == -999)
})
if (length(index_na) != 0) {
for (m in seq_len(ncol(rets))) {
if (length(index_na[[m]]) != 0) {
rets[index_na[[m]], m] <- NA
rets[, m] <- zoo::na.locf(rets[, m], na.rm = FALSE)
}
}
}
}
rets <- rets / 100
data_rets <- data.frame(dates, rets)
colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
files <- list.files()
del_indx <- substr(files, nchar(files) - (3 - 1), nchar(files))
index_del <- which(del_indx == "CSV")
file.remove(files[index_del])
write.csv(data_rets, paste0(factors, ".csv", sep = ""),
row.names =
FALSE
)
setwd(dir)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.