Nothing
#' @name mf_download_data
#' @aliases mf_download_data
#' @title Download several datasets given their URLs and destination path
#' @description This function enables to download datasets. In a data import workflow, this function is typically used after a call to the \link{mf_get_url} function. The output value of \link{mf_get_url} can be used as input of parameter \code{df_to_dl} of \link{mf_download_data}.
#'
#' The download can the parallelized.
#'
#' @inheritParams mf_get_url
#' @inheritParams mf_login
#' @param df_to_dl data.frame. Urls and destination files of dataset to download. Typically output of \link{mf_get_url}. See Details for the structure
#' @param path string. Target folder for the data to download. Default : temporary folder.
#' @param parallel boolean. Parallelize the download ? Default to FALSE
#' @param num_workers integer. Number of workers in case of parallel download. Default to number of workers available in the machine minus one.
#' @param min_filesize integer. Minimum file size expected (in bites) for one file downloaded. If files downloaded are less that this value, the files will be downloaded again. Default 5000.
#'
#' @return a data.frame with the same structure of the input data.frame \code{df_to_dl} + columns providing details of the data downloaded. The additional columns are :
#' \describe{
#' \item{fileDl}{Booloean (dataset downloaded or failure)}
#' \item{dlStatus}{Download status : 1 = download ok ; 2 = download error ; 3 = dataset was already existing in destination file }
#' \item{fileSize}{File size on disk}
#' }
#'
#' @details
#'
#' Parameter \code{df_to_dl} must be a data.frame with the following minimal structure :
#' \describe{
#' \item{id_roi}{An id for the ROI (character string)}
#' \item{collection}{Collection (character string)}
#' \item{name}{}
#' \item{url}{URL of the file to download (character string)}
#' }
#'
#' @import dplyr parallel httr
#' @importFrom utils write.csv URLdecode
#' @export
#'
#' @examples
#'
#' \dontrun{
#'
#' ### Login to EOSDIS Earthdata with your username and password
#' log <- mf_login(credentials = c("earthdata_un","earthdata_pw"))
#'
#' ### Set-up parameters of interest
#' coll <- "MOD11A1.061"
#'
#' bands <- c("LST_Day_1km","LST_Night_1km")
#'
#' time_range <- as.Date(c("2017-01-01","2017-01-30"))
#'
#' roi <- sf::st_as_sf(data.frame(
#' id = "roi_test",
#' geom="POLYGON ((-5.82 9.54, -5.42 9.55, -5.41 8.84, -5.81 8.84, -5.82 9.54))"),
#' wkt="geom",crs = 4326)
#'
#' ### Get the URLs of the data
#' (urls_mod11a1 <- mf_get_url(
#' collection = coll,
#' variables = bands,
#' roi = roi,
#' time_range = time_range
#' ))
#'
#' ### Download the data
#' res_dl <- mf_download_data(urls_mod11a1)
#'
#' ### Import the data as terra::SpatRast
#' modis_ts <- mf_import_data(dirname(res_dl$destfile[1]), collection = coll)
#'
#' ### Plot the data
#' terra::plot(modis_ts)
#'
#'}
mf_download_data<-function(df_to_dl,path=tempfile("modisfast_"),parallel=FALSE,num_workers=parallel::detectCores()-1,credentials=NULL,verbose=TRUE,min_filesize=5000){
fileSize <- destfile <- fileDl <- folders <- readme_files <- source <- NULL
source="earthdata"
# tests
if(!inherits(verbose,"logical")){stop("verbose argument must be boolean\n")}
if(!inherits(parallel,"logical")){stop("parallel argument must be boolean\n")}
#if(!is.null(source) && !inherits(source,"character")){stop("source argument must be either NULL or 'earthdata' \n")}
if(!inherits(df_to_dl,"data.frame")){stop("df_to_dl argument must be a data.frame\n")}
if(!("url" %in% colnames(df_to_dl))){stop("df_to_dl argument must be a data.frame with at least 4 columns named 'url', 'collection', 'name', and 'id_roi' \n")}
if(!("collection" %in% colnames(df_to_dl))){stop("df_to_dl argument must be a data.frame with at least 4 columns named 'url', 'collection', 'name, and 'id_roi' '\n")}
if(!("name" %in% colnames(df_to_dl))){stop("df_to_dl argument must be a data.frame with at least 4 columns named 'url', 'collection', 'name, and 'id_roi' '\n")}
if(!("id_roi" %in% colnames(df_to_dl))){stop("df_to_dl argument must be a data.frame with at least 4 columns named 'url', 'collection', 'name, and 'id_roi' '\n")}
if(num_workers>parallel::detectCores()){stop("the number of workers that you set is greater than the number of available workers in your machine\n")}
.testInternetConnection()
df_to_dl$destfile <- file.path(path,"data",df_to_dl$id_roi,df_to_dl$collection,df_to_dl$name)
# if(dir.exists(path)){warning("Target folder already exists\n")}
# check which data is already downloaded
data_dl<-df_to_dl %>%
dplyr::mutate(fileDl=file.exists(destfile)) %>%
dplyr::mutate(fileSize=ifelse(fileDl==TRUE,file.size(destfile),NA)) %>%
dplyr::mutate(fileDl=ifelse(fileDl==TRUE & fileSize>=min_filesize,TRUE,FALSE)) %>%
dplyr::mutate(dlStatus=ifelse(fileDl==TRUE,3,NA))
file.remove(data_dl$destfile[which(data_dl$fileSize<=min_filesize)])
# data already downloaded
data_already_exist<-data_dl %>%
dplyr::filter(fileDl==TRUE)
# data to download
data_to_download<-data_dl %>%
dplyr::filter(fileDl==FALSE)
if(verbose){cat(nrow(df_to_dl)," datasets in total : ", nrow(data_already_exist)," already downloaded and ",nrow(data_to_download)," datasets to download\n")}
if (nrow(data_to_download)>0){
# Create directories if they do not exist
unique(dirname(data_to_download$destfile)) %>%
lapply(dir.create,recursive = TRUE, showWarnings = FALSE#, mode = "0777"
)
# download data
#for (i in 1:nrow(data_to_download)){
# httr::GET(data_to_download$url[i],httr::authenticate(username,password),write_disk(data_to_download$destfile[i]))
# }
if(!is.null(source)){
if(source=="earthdata"){
.testLogin(credentials)
username<-getOption("earthdata_user")
password<-getOption("earthdata_pass")
}
} else {
username <- password <- "no_auth"
}
dl_func<-function(url,output,username,password) {
u <- httr::GET(url)
httr::GET(u$url,httr::authenticate(username,password),httr::write_disk(output),httr::progress(),config = list(maxredirs=-1))
#GET(u$url, httr::write_disk(output), httr::progress(), config(maxredirs=-1, netrc = TRUE, netrc_file = netrc), set_cookies("LC" = "cookies"))
}
if(verbose){cat("Downloading the data...\n")}
if (parallel){
cl <- parallel::makeCluster(num_workers)
parallel::clusterMap(cl, dl_func, url=data_to_download$url,output=data_to_download$destfile,username=username,password=password,
.scheduling = 'dynamic')
parallel::stopCluster(cl)
} else {
for (i in 1:nrow(data_to_download)){
if(verbose){cat("[",i," over ", nrow(data_to_download),"]\n")}
dl_func(url=data_to_download$url[i],output=data_to_download$destfile[i],username=username,password=password)
}
}
}
data_dl<-data_to_download %>%
dplyr::mutate(fileDl=purrr::map_lgl(destfile,file.exists)) %>%
dplyr::mutate(dlStatus=ifelse(fileDl==TRUE,1,2)) %>%
dplyr::mutate(fileSize=file.size(destfile)) %>%
rbind(data_already_exist)
# to deal with pb when not all the data are downloaded
data_downloaded <- dplyr::filter(data_dl,fileSize>=min_filesize)
if(!(identical(data_dl,data_downloaded))){
if(verbose){cat("Only part of the data has been downloaded. Downloading the remaining datasets one by one...\n")}
mf_download_data(df_to_dl=df_to_dl,path=path,parallel=FALSE,credentials=credentials)#,source=source)
} else {
# 1 : download ok
# 2 : download error
# 3 : data already existing in output folder
if(verbose){cat("\nData were all properly downloaded under the folder(s) ",paste(as.character(unique(dirname(df_to_dl$destfile))), collapse=" and "),"\n**To import the data in R, use the function modisfast::mf_import_data() rather than terra::rast() or stars::read_stars(). More info at help(mf_import_data)**\n")}
}
# write readme
sentence <- paste0("Query performed on the ",Sys.time(),"
Use the function modisfast::mf_import_data() rather than terra::rast() or stars::read_stars() to import the data in R ! More info at help(mf_import_data)
See the file Summary_downloaded_data.csv for more information on the data downloaded"
)
writeLines(sentence, file.path(path,"Readme.txt"))
# write csv dataset
data_dl$url <- utils::URLdecode(data_dl$url)
write.csv(data_dl, file.path(path,"Summary_downloaded_data.csv"), row.names = F)
return(data_dl)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.