R/ffdata_download.R

Defines functions threeway_download bivariate_download industry_download usresearch_download ffdata_download

Documented in bivariate_download ffdata_download industry_download threeway_download usresearch_download

#' @title The ffdata_download function
#'
#' @description This function automatically downloads user-specified portfolio returns from
#' the Kenneth R. French Data Library.
#' The downloaded returns are saved as a .csv-file in the chosen directory.
#'
#' @param dir a character string, the directory for saving the data.
#' The current working directory is set by default.
#' @param type a character string, the portfolio type.
#' Possible values are "USResearch" (default) for the Fama-French 3 or 5 Factors,
#' "Industry" for the Industry portfolios, "Bivariate" for the Bivariate sorts on Size (ME),
#' Book-to-Market (BE), Operational Profitability (OP) and Investment (INV),
#' "Threeway" for the ME, BE, OP, and INV.
#' @param subtype a character string, the portfolio subtype.
#' Possible values are "ME_BE", "ME_OP", "ME_INV", "BEME_OP", "BEME_INV", "OP_INV" (for the Bivariate type) and
#' "ME_BEME_OP", "ME_BEME_INV", "ME_OP_INV" (for the Threeway type).
#' @param number_factors an integer, the number of factors for the portfolios.
#' Possible values are 3 (default) or 5 (for the USResearch portfolios);
#' 5 (default), 10, 12, 17, 30, 38, 48 or 49 (for the Industry portfolios) and
#' 6 (default), 25 or 100 (for the Bivariate portfolios).
#' @param freq a character string, the frequency of returns.
#' Possible values are "m" for monthly (default) or "d" for daily.
#' @param dividends a logical, TRUE (default) considers dividends. FALSE downloads the data without dividends.
#' @param start a character string, start date for the download in the format "YYmm". Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned. # nolint: line_length_linter.
#' @return a .csv-file within the directory, defined in dir.
#'
#' @examples
#' \dontrun{
#' ffdata_download()
#' ffdata_download(freq = "m", type = "Bivariate", subtype = "ME_BE", number_factors = 25)
#' }
#' @export ffdata_download
#' @import utils
ffdata_download <- function(
    dir = NULL,
    type = "USResearch",
    subtype = NULL,
    number_factors = NULL,
    freq = "m",
    dividends = TRUE,
    start = NULL,
    end = NULL,
    clean_na = TRUE) {
  base <- "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"

  if (is.null(start)) {
    start <- "197501"
  }
  if (is.null(end)) {
    end <- format(zoo::as.yearmon(Sys.Date()) - .2, "%Y%m")
  }
  if (is.null(dir)) {
    dir <- getwd()
  }

  try(if (freq == "d" & dividends == TRUE) {
    stop("Please, set dividends TRUE only for a monthly frequency.")
  })
  try(if (freq == "d" & type == "Threeway") {
    stop("Please, set the Threeway portfolio type only for a monthly frequency.")
  })

  sub_path <- paste(type, freq, start, end, sep = "_")
  dir.create(sub_path, showWarnings = FALSE)

  if (type == "USResearch") {
    if (is.null(number_factors)) {
      number_factors <- 3
    }
  } else if (type == "Industry") {
    if (is.null(number_factors)) {
      number_factors <- 5
    }
  } else if (type == "Bivariate") {
    if (is.null(subtype)) {
      subtype <- "ME_BE"
    }
    if (is.null(number_factors)) {
      number_factors <- 6
    }
  } else if (type == "Threeway") {
    if (is.null(subtype)) {
      subtype <- "ME_BEME_OP"
    }
  }

  factors_usr_all <- c(3, 5)
  factors_ind_all <- c(5, 10, 12, 17, 30, 38, 48, 49)
  factors_bi_all <- c(6, 25, 100)
  subtypes_bivar_all <-
    c("ME_BE", "ME_OP", "ME_INV", "BEME_OP", "BEME_INV", "OP_INV")
  subtypes_threeway_all <-
    c("ME_BEME_OP", "ME_BEME_INV", "ME_OP_INV")

  if (type == "USResearch") {
    if (is.na(match(number_factors, factors_usr_all))) {
      stop(
        "Please, set the argument number_factors to one of the following for the USResearch Dataset: 3, 5."
      )
    } else {
      usresearch_download(
        base,
        dir,
        sub_path,
        number_factors,
        freq,
        start,
        end,
        clean_na
      )
    }
  } else if (type == "Industry") {
    if (is.na(match(number_factors, factors_ind_all))) {
      stop(
        "Please, set the argument number_factors to one of the following for the Industry Dataset:
        5, 10, 12, 17, 30, 38, 48, 49."
      )
    } else {
      industry_download(
        base,
        dir,
        sub_path,
        number_factors,
        freq,
        dividends,
        start,
        end,
        clean_na
      )
    }
  } else if (type == "Bivariate") {
    if (is.na(match(subtype, subtypes_bivar_all))) {
      stop(
        "Please, set the argument subtype to one of the following for the Bivariate Dataset:
        ME_BE, ME_OP, ME_INV, BEME_OP, BEME_INV, OP_INV."
      )
    } else if (is.na(match(number_factors, factors_bi_all))) {
      stop(
        "Please, set the argument number_factors to one of the following for the Bivariate Dataset: 6,25,100."
      )
    } else {
      if ((subtype == "BEME_OP" || subtype == "BEME_INV" || subtype == "OP_INV") && number_factors != 25) {
        stop(
          "Please, set the argument number_factors to 25 for the Bivariate BEME_OP, BEME_INV or OP_INV datasets."
        )
      }
      bivariate_download(
        base,
        dir,
        sub_path,
        number_factors,
        subtype,
        freq,
        dividends,
        start,
        end,
        clean_na
      )
    }
  } else if (type == "Threeway") {
    if (is.na(match(subtype, subtypes_threeway_all))) {
      stop(
        "Please, set the argument subtype to one of the following for the Threeway Dataset:
        ME_BEME_OP, ME_BEME_INV, ME_OP_INV."
      )
    } else {
      threeway_download(
        base,
        dir,
        sub_path,
        subtype,
        dividends,
        start,
        end,
        clean_na
      )
    }
  }

  setwd(dir)
}


#' @title The usresearch_download function
#'
#' @description This function automatically downloads US-Research portfolios returns
#' from the Kenneth R. French Data Library.
#' The downloaded returns are saved then as a .csv-file in the chosen directory.
#'
#' @param base a character string, the Kenneth R. French Data Library address.
#' @param dir a character string, the directory for saving the data.
#' The current working directory is set by default.
#' @param sub_path a character string, the folder subpath, created by ffDataDownload.
#' @param number_factors an integer, the number of factors for the USResearch portfolios.
#' Possible values are 3 (default) and 5.
#' @param freq a character string, the frequency of returns.
#' Possible values are "m" for monthly (default) or "d" for daily.
#' @param start a character string, start date for the download in the format "YYmm".
#' Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned.
#' @return  a .csv-file within the directory, defined in dir.
#' @export usresearch_download

usresearch_download <-
  function(base,
           dir,
           sub_path,
           number_factors,
           freq,
           start,
           end,
           clean_na) {
    setwd(sub_path)
    format <- "_CSV.zip"

    if (number_factors == 3) {
      factors <- "F-F_Research_Data_Factors"

      if (freq == "d") {
        factors <- paste(factors, "_daily", sep = "")
      } else {
        factors <- factors
      }
      full_url <- paste(base, factors, format, sep = "")

      temp <- tempfile()
      download.file(full_url, temp, quiet = TRUE)

      data <- unzip(temp)
      text <- readLines(data)
      skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
      data_rets <-
        read.csv(
          data,
          skip = skip_indx,
          header = TRUE,
          stringsAsFactors = FALSE
        )
      colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
      begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
      end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
      if (is.na(end_indx)) {
        final <-
          format(max(as.Date(paste(
            substr(data_rets[, 1], 1, 6), 1
          ), "%Y%m%d"), na.rm = TRUE), "%Y%m")
        print(
          paste(
            "The end point ",
            end,
            " is still not present in the data.
            The time series' end is set to automatically to the latest reported date ",
            final,
            ".",
            sep = ""
          )
        )
        end_indx <- which(substr(data_rets[, 1], 1, 6) == final)[1]
      }
      data_rets <- data_rets[begin_indx:end_indx, ]
      data_rets <- data_rets[order(data_rets[, 1]), ]
      dates <- data_rets[, 1]
      rets <- data_rets[, -1]
      rets <- apply(rets, 2, as.numeric)
      if (clean_na) {
        index_na <-
          apply(rets, 2, function(x) {
            which(ceiling(x) == -99 | ceiling(x) == -999)
          })
        if (length(index_na) != 0) {
          for (m in seq_len(ncol(rets))) {
            if (length(index_na[[m]]) != 0) {
              rets[index_na[[m]], m] <- NA
              rets[, m] <- zoo::na.locf(rets[, m], na.rm = FALSE)
            }
          }
        }
      }
      rets <- rets / 100
      data_rets <- data.frame(dates, rets)
      colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
      files <- list.files()
      del_indx <- substr(files, nchar(files) - (3 - 1), nchar(files))
      index_del <- which(del_indx == "CSV")
      file.remove(files[index_del])
      write.csv(data_rets, paste0(factors, ".csv", sep = ""),
        row.names =
          FALSE
      )
    } else {
      factors <- "F-F_Research_Data_5_Factors_2x3"
      if (freq == "d") {
        factors <- paste(factors, "_daily", sep = "")
      } else {
        factors <- factors
      }
      full_url <- paste(base, factors, format, sep = "")

      ## download
      temp <- tempfile()
      download.file(full_url, temp, quiet = TRUE)

      ## open
      data <- unzip(temp)
      text <- readLines(data)
      skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
      data_rets <-
        read.csv(
          data,
          skip = skip_indx,
          header = TRUE,
          stringsAsFactors = FALSE
        )
      colnames(data_rets) <-
        c("Date", colnames(data_rets)[-1])
      begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
      end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
      if (is.na(end_indx)) {
        final <-
          format(max(as.Date(paste(
            substr(data_rets[, 1], 1, 6), 1
          ), "%Y%m%d"), na.rm = TRUE), "%Y%m")
        print(
          paste(
            "The end point ",
            end,
            " is still not present in the data.
            The time series' end is set to automatically to the latest reported date ",
            final,
            ".",
            sep = ""
          )
        )
        end_indx <-
          which(substr(data_rets[, 1], 1, 6) == final)[1]
      }
      data_rets <- data_rets[begin_indx:end_indx, ]
      data_rets <- data_rets[order(data_rets[, 1]), ]
      dates <- data_rets[, 1]
      rets <- data_rets[, -1]
      rets <- apply(rets, 2, as.numeric)
      if (clean_na) {
        index_na <-
          apply(rets, 2, function(x) {
            which(ceiling(x) == -99 | ceiling(x) == -999)
          })
        if (length(index_na) != 0) {
          for (m in seq_len(ncol(rets))) {
            if (length(index_na[[m]]) != 0) {
              rets[index_na[[m]], m] <- NA
              rets[, m] <-
                zoo::na.locf(rets[, m], na.rm = FALSE)
            }
          }
        }
      }
      rets <- rets / 100
      data_rets <- data.frame(dates, rets)
      colnames(data_rets) <-
        c("Date", colnames(data_rets)[-1])
      files <- list.files()
      del_indx <-
        substr(files, nchar(files) - (3 - 1), nchar(files))
      index_del <- which(del_indx == "CSV")
      file.remove(files[index_del])
      write.csv(data_rets, paste0(factors, ".csv", sep = ""),
        row.names =
          FALSE
      )
    }

    setwd(dir)
  }

#' @title The industry_download function
#'
#' @description This function automatically downloads Industry portfolios returns
#' from the Kenneth R. French Data Library.
#' The downloaded returns are saved then as a .csv-file in the chosen directory.
#'
#' @param base a character string, the Kenneth R. French Data Library address.
#' @param dir a character string, the directory for saving the data. The current working directory is set by default.
#' @param sub_path a character string, the folder subpath, created by ffDataDownload.
#' @param number_factors an integer, the number of factors for the Industry portfolios.
#' Possible values are 5 (default), 10, 12, 17, 30, 38, 48 or 49.
#' @param freq a character string, the frequency of the returns, "m" for monthly (default) or "d" for daily.
#' @param dividends a logical, TRUE (default) considers dividends. FALSE downloads the data without dividends.
#' @param start a character string, start date for the download in the format "YYmm".
#' Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned.
#' @return  a .csv-file within the directory, defined in dir.
#' @export industry_download

industry_download <-
  function(base,
           dir,
           sub_path,
           number_factors,
           freq,
           dividends,
           start,
           end,
           clean_na) {
    setwd(sub_path)
    format <- "_CSV.zip"

    ## url
    factors <-
      paste(number_factors, "Industry_Portfolios", sep = "_")

    if (freq == "d") {
      factors <- paste(factors, "_daily", sep = "")
    } else {
      if (dividends) {
        factors <- factors
      } else {
        factors <- paste(factors, "_Wout_Div", sep = "")
      }
    }

    ## url
    full_url <- paste(base, factors, format, sep = "")

    ## download (main)
    temp <- tempfile()
    download.file(full_url, temp, quiet = TRUE)

    ## open
    data <- unzip(temp)
    text <- readLines(data)
    skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
    data_rets <-
      read.csv(
        data,
        skip = skip_indx,
        header = TRUE,
        stringsAsFactors = FALSE
      )
    colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
    begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
    end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
    if (is.na(end_indx)) {
      final <-
        format(max(as.Date(paste(
          substr(data_rets[, 1], 1, 6), 1
        ), "%Y%m%d"), na.rm = TRUE), "%Y%m")
      print(
        paste(
          "The end point ",
          end,
          " is still not present in the data.
          The time series' end is set to automatically to the latest reported date ",
          final,
          ".",
          sep = ""
        )
      )
      end_indx <- which(substr(data_rets[, 1], 1, 6) == final)[1]
    }
    data_rets <- data_rets[begin_indx:end_indx, ]
    data_rets <- data_rets[order(data_rets[, 1]), ]
    dates <- data_rets[, 1]
    rets <- data_rets[, -1]
    rets <- apply(rets, 2, as.numeric)
    if (clean_na) {
      index_na <-
        apply(rets, 2, function(x) {
          which(ceiling(x) == -99 | ceiling(x) == -999)
        })
      if (length(index_na) != 0) {
        for (m in seq_len(ncol(rets))) {
          if (length(index_na[[m]]) != 0) {
            rets[index_na[[m]], m] <- NA
            rets[, m] <- zoo::na.locf(rets[, m], na.rm = FALSE)
          }
        }
      }
    }
    rets <- rets / 100
    data_rets <- data.frame(dates, rets)
    colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
    files <- list.files()
    del_indx <- substr(files, nchar(files) - (3 - 1), nchar(files))
    index_del <- which(del_indx == "CSV")
    file.remove(files[index_del])
    write.csv(data_rets, paste0(factors, ".csv", sep = ""),
      row.names =
        FALSE
    )

    setwd(dir)
  }


#' @title The bivariate_download function
#'
#' @description  This function automatically downloads Bivariate portfolios returns
#' from the Kenneth R. French Data Library.
#' The downloaded returns are saved then as a .csv-file in the chosen directory.
#'
#' @param base a character string, the Kenneth R. French Data Library address.
#' @param dir a character string, the directory for saving the data. The current working directory is set by default.
#' @param sub_path a character string, the folder subpath, created by ffDataDownload.
#' @param number_factors an integer, the number of factors for the Bivariate portfolios.
#' Possible values are 6 (default), 25 or 100.
#' @param subtype a character string, the Bivariate portfolio subtype.
#' Possible values are "ME_BE" (default), "ME_OP", "ME_INV", "BEME_OP", "BEME_INV", "OP_INV".
#' @param freq a character string, the frequency of the returns, "m" for monthly (default) or "d" for daily.
#' @param dividends a logical, TRUE (default) considers dividends. FALSE downloads the data without dividends.
#' @param start a character string, start date for the download in the format "YYmm". Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned.
#' @return  a .csv-file within the directory, defined in dir.
#' @export bivariate_download

bivariate_download <-
  function(base,
           dir,
           sub_path,
           number_factors,
           subtype,
           freq,
           dividends,
           start,
           end,
           clean_na) {
    setwd(sub_path)
    format <- "_CSV.zip"

    if (subtype == "ME_BE") {
      subtype <- ""
    } # according to FamaFrench webseite

    if (subtype == "") {
      if (number_factors == 6) {
        factors <- paste(number_factors, "Portfolios", "2x3", sep = "_")
      } else if (number_factors == 25) {
        factors <- paste(number_factors, "Portfolios", "5x5", sep = "_")
      } else {
        factors <- paste(number_factors, "Portfolios", "10x10", sep = "_")
      }
    } else {
      if (number_factors == 6) {
        factors <-
          paste(number_factors, "Portfolios", subtype, "2x3", sep = "_")
      } else if (number_factors == 25) {
        factors <-
          paste(number_factors, "Portfolios", subtype, "5x5", sep = "_")
      } else {
        factors <-
          paste(number_factors,
            "Portfolios",
            subtype,
            "10x10",
            sep = "_"
          )
      }
    }
    if (freq == "d") {
      factors <- paste(factors, "_daily", sep = "")
    } else {
      factors <- factors
      if (dividends) {
        factors <- factors
      } else {
        if ((subtype == "ME_OP" || subtype == "ME_INV") && number_factors == 100) {
          factors <-
            paste(number_factors,
              "Portfolios",
              "10x10",
              subtype,
              "Wout_Div",
              sep = "_"
            )
        } else {
          factors <- paste(factors, "_Wout_Div", sep = "")
        }
      }
    }

    ## url(s)
    full_url <- paste(base, factors, format, sep = "")

    ## download (main)
    temp <- tempfile()
    download.file(full_url, temp, quiet = TRUE)

    ## open
    data <- unzip(temp)
    text <- readLines(data)
    skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
    data_rets <-
      read.csv(
        data,
        skip = skip_indx,
        header = TRUE,
        stringsAsFactors = FALSE
      )
    colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
    begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
    end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
    if (is.na(end_indx)) {
      final <-
        format(max(as.Date(paste(
          substr(data_rets[, 1], 1, 6), 1
        ), "%Y%m%d"), na.rm = TRUE), "%Y%m")
      print(
        paste(
          "The end point ",
          end,
          " is still not present in the data.
          The time series' end is set to automatically to the latest reported date ",
          final,
          ".",
          sep = ""
        )
      )
      end_indx <- which(substr(data_rets[, 1], 1, 6) == final)[1]
    }
    data_rets <- data_rets[begin_indx:end_indx, ]
    data_rets <- data_rets[order(data_rets[, 1]), ]
    dates <- data_rets[, 1]
    rets <- data_rets[, -1]
    rets <- apply(rets, 2, as.numeric)
    if (clean_na) {
      index_na <-
        apply(rets, 2, function(x) {
          which(ceiling(x) == -99 | ceiling(x) == -999)
        })
      if (length(index_na) != 0) {
        for (m in seq_len(ncol(rets))) {
          if (length(index_na[[m]]) != 0) {
            rets[index_na[[m]], m] <- NA
            rets[, m] <- zoo::na.locf(rets[, m], na.rm = FALSE)
          }
        }
      }
    }

    rets <- rets / 100
    data_rets <- data.frame(dates, rets)
    colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
    files <- list.files()
    del_indx <- substr(files, nchar(files) - (3 - 1), nchar(files))
    index_del <- which(del_indx == "CSV")
    file.remove(files[index_del])
    write.csv(data_rets, paste0(factors, ".csv", sep = ""),
      row.names =
        FALSE
    )

    setwd(dir)
  }

#' @title The threeway_download function
#'
#' @description This function automatically downloads Threeway portfolios returns
#' from the Kenneth R. French Data Library.
#' The downloaded returns are saved then as a .csv-file in the chosen directory.
#'
#' @param base a character string, the Kenneth R. French Data Library address.
#' @param dir a character string, the directory for saving the data. The current working directory is set by default.
#' @param sub_path a character string, the folder subpath, created by ffDataDownload.
#' @param subtype a character string, the Threeway portfolio subtype.
#' Possible values are "ME_BEME_OP" (default), "ME_BEME_INV" or "ME_OP_INV".
#' @param dividends a logical, TRUE (default) considers dividends. FALSE downloads the data without dividends.
#' @param start a character string, start date for the download in the format "YYmm". Default value is 197501.
#' @param end a character string, end date for the download in the format "YYmm".
#' Default value is two months before Sys.Date() to insure availability.
#' @param clean_na a logical, TRUE (default) replaces NAs with zoo::na.locf(). If FALSE, NAs are not cleaned.
#' @return  a .csv-file within the directory, defined in dir.
#' @export threeway_download

threeway_download <-
  function(base,
           dir,
           sub_path,
           subtype,
           dividends,
           start,
           end,
           clean_na) {
    setwd(sub_path)
    format <- "_CSV.zip"
    factors <- paste(32, "Portfolios", subtype, "2x4x4", sep = "_")
    if (dividends) {
      factors <- factors
    } else {
      factors <- paste(factors, "_Wout_Div", sep = "")
    }

    full_url <- paste(base, factors, format, sep = "")

    ## download (main)
    temp <- tempfile()
    download.file(full_url, temp, quiet = TRUE)

    ## open
    data <- unzip(temp)
    text <- readLines(data)
    skip_indx <- which(substr(text, 1, 2) == "19")[1] - 2
    data_rets <-
      read.csv(
        data,
        skip = skip_indx,
        header = TRUE,
        stringsAsFactors = FALSE
      )
    colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
    begin_indx <- which(substr(data_rets[, 1], 1, 6) == start)[1]
    end_indx <- which(substr(data_rets[, 1], 1, 6) == end)[1]
    if (is.na(end_indx)) {
      final <-
        format(max(as.Date(paste(
          substr(data_rets[, 1], 1, 6), 1
        ), "%Y%m%d"), na.rm = TRUE), "%Y%m")
      print(
        paste(
          "The end point ",
          end,
          " is still not present in the data.
          The time series' end is set to automatically to the latest reported date ",
          final,
          ".",
          sep = ""
        )
      )
      end_indx <- which(substr(data_rets[, 1], 1, 6) == final)[1]
    }
    data_rets <- data_rets[begin_indx:end_indx, ]
    data_rets <- data_rets[order(data_rets[, 1]), ]
    dates <- data_rets[, 1]
    rets <- data_rets[, -1]
    rets <- apply(rets, 2, as.numeric)
    if (clean_na) {
      index_na <-
        apply(rets, 2, function(x) {
          which(ceiling(x) == -99 | ceiling(x) == -999)
        })
      if (length(index_na) != 0) {
        for (m in seq_len(ncol(rets))) {
          if (length(index_na[[m]]) != 0) {
            rets[index_na[[m]], m] <- NA
            rets[, m] <- zoo::na.locf(rets[, m], na.rm = FALSE)
          }
        }
      }
    }
    rets <- rets / 100
    data_rets <- data.frame(dates, rets)
    colnames(data_rets) <- c("Date", colnames(data_rets)[-1])
    files <- list.files()
    del_indx <- substr(files, nchar(files) - (3 - 1), nchar(files))
    index_del <- which(del_indx == "CSV")
    file.remove(files[index_del])
    write.csv(data_rets, paste0(factors, ".csv", sep = ""),
      row.names =
        FALSE
    )

    setwd(dir)
  }
antshi/ffData documentation built on June 13, 2025, 4:54 p.m.