R/monthlyData_download.R

Defines functions monthlyData_download

Documented in monthlyData_download

##' monthlyData_download
##'
##' Download plpd, bnf, demog and qof data in csv format and save them in the data directory as generated by the \emph{dirsGen()} function
##'
##' @param settings setting object created via \emph{setConfig()} function containing paths, month and year of analysis and file names.
##' @param whichData string indicatin which Data to download. One of "plpd", "bnf", "demog" and "all"
##' default = "all"
##' @param sample logic. if TRUE (default) a sample file is downloaded from github Repository with only 500.000 rows.
##'  When FALSE completa data are downloaded from website. 
##' 
##' @details Verify if data have already been downloaded and, if not, download all data for one month analysis. 
##' Data are imported from the following sources:
##'  - plpd: github when sample=T /website when sample = F 
##'  - bnf: github/csv
##'  - demog: github/csv
##'  - qof: github/csv
##' github repository at this link: \emph{https://github.com/muschitiello/PrescRiptionsData}
##' For time saving purposes the function checks if data are already available in the relative path.
##' If the data have already been downloaded, a message is returned saying that data are already available.
##' 
##' @seealso \code{\link{monthlyData_import}}
##' 
##' @return the function does not return anything but downloads data in the \emph{dataInput} folder and under the relative subfolder
##' 
##' @export
##'


monthlyData_download = function(settings, whichData = "all",sample){
  
  if(!exists("sample")){
    message("sample argument not specified, SAMPLE DATA will be downloaded")
    sample = TRUE
  }
  
  dirs = dirsGen(settings)
  month=as.character(stringr::str_pad(settings$month,width = 2,side = "left",pad = "0"))
  year = settings$year
  rootdir = settings$rootdir
  
  whichDataAll = c("all","plpd","bnf","demog","qof")
  
  if(any(!whichData%in%whichDataAll)){
    stop(paste0("Error in whichData. Admitted values are: ",paste0(whichDataAll, collapse = ", ")," quoted"))
  }
  
  if(length(whichData)>1 & "all" %in% whichData){
    whichData = "all"
  }
  
  if(all(length(whichData)==1 & whichData=="all")){
    plpdTF = TRUE
    bnfTF = TRUE
    demogTF = TRUE
    qofTF = TRUE
  }else{
    plpdTF = FALSE
    bnfTF = FALSE
    demogTF = FALSE
    qofTF = FALSE
    
    if("plpd" %in% whichData){
      plpdTF = TRUE
    }
    if("bnf" %in% whichData){
      bnfTF = TRUE
    }
    if("demog" %in% whichData){
      demogTF = TRUE
    }
    if("qof" %in% whichData){
      qofTF = TRUE
    }
  }
  
  ##############################################################
  #### plpd  source
  
  plpdurl = paste0("plpd",year,month)
  prefix = paste0(year,month)
  folder = paste0("plpd_",prefix)
  sampleFolder = paste0("plpd_",prefix,"_SAMPLE")
  outF = "csv"
  
  url = switch(plpdurl,
               # Data URL
               "plpd201801" = "https://files.digital.nhs.uk/11/1E8A59/2018_01_Jan.zip",
               "plpd201802" = "https://files.digital.nhs.uk/35/292E2A/2018_02_Feb.zip",
               "plpd201803" = "https://files.digital.nhs.uk/6F/CE775A/2018_03_Mar.zip",
               "plpd201804" = "https://files.digital.nhs.uk/E3/801EA5/2018_04_Apr.zip",
               "plpd201805" = "https://files.digital.nhs.uk/B0/B15E0B/2018_05_May.zip",
               "plpd201806" = "https://files.digital.nhs.uk/07/697711/2018_06_Jun.zip",
               "plpd201807" = "https://files.digital.nhs.uk/7E/FC3950/2018_07_Jul.zip",
               "plpd201808" = "https://files.digital.nhs.uk/43/C6644B/2018_08_Aug.zip",
               "plpd201809" = "https://files.digital.nhs.uk/5C/FE61C4/2018_09_Sep.zip",
               "plpd201810" = "https://files.digital.nhs.uk/33/3EE982/2018_10_Oct.zip",
               "plpd201811" = "https://files.digital.nhs.uk/96/A7878A/2018_11_Nov.zip",
               "plpd201812" = "https://files.digital.nhs.uk/94/405A94/2018_12_Dec.zip",
               "plpd201901" = "https://files.digital.nhs.uk/EC/D8DF0F/2019_01_Jan.zip",
               "plpd201902" = "https://files.digital.nhs.uk/6E/DD7444/2019_02_Feb.zip",
               "plpd201903" = "https://files.digital.nhs.uk/49/2988AF/2019_03_MarV2.zip",
               "plpd201904" = "https://files.digital.nhs.uk/85/3C25E0/2019_04_APR.zip",
               "plpd201905" = "https://files.digital.nhs.uk/CA/9D94E7/2019_05_May.zip",
               "plpd201906" = "https://files.digital.nhs.uk/9B/047263/2019_06_Jun.zip",
               "plpd201907" = "https://files.digital.nhs.uk/B9/369C6C/2019_07_Jul.zip",
               "plpd201908" = "https://files.digital.nhs.uk/50/A23B13/2019_08_Aug.zip",
               "plpd201909" = "https://files.digital.nhs.uk/E4/D720CC/2019_09_Sep.zip",
               "plpd201910" = "https://files.digital.nhs.uk/0B/E73CF4/2019_10_Oct.zip",
               "plpd201911" = "https://files.digital.nhs.uk/66/972E65/2019_11_Nov.zip",
               "plpd201912" = "https://files.digital.nhs.uk/5A/CA6C2E/2019_12_Dec.zip"
  )
  
  if(plpdTF){
    message(paste(folder))
    if(sample){
      if(dir.exists(paste0(dirs$plpdRootDirSample,"/",sampleFolder)) & length(list.files(paste0(dirs$plpdRootDirSample,"/",sampleFolder)))==3){
        message(paste0("SAMPLE Files already exist"))
      }else{
        message("download")
        
        suppressWarnings(dir.create(paste0(dirs$plpdRootDirSample,"/",sampleFolder),recursive = T))
        # create the temporary file
        if(Sys.getenv("TEMP")!=""){
          td=paste0(Sys.getenv("TEMP"),"\\PrescRtemp")
        }else{
          td=paste0(getwd(),"/PrescRtemp")
        }
        on.exit(unlink(td))
        tf = tempfile(tmpdir = td,fileext = ".zip")
        if(!dir.exists(td)){
          dir.create(td)
        }
        # define url from GH
        
        url = paste0(dirs$urlGH,dirs$gitPlpdZipDir,"/",sampleFolder,".zip")
        
        
        # download zip
        utils::download.file(url = url, destfile = tf)
        # unzip in temporary folder
        unzip(zipfile = tf, exdir = dirs$plpdRootDirSample)
        
        # rm tmp file
        unlink(tf)
        unlink(td,recursive = T)
        
        
      }
      
    }else{
      
      
      # check if file already exists
      # if not, create folder
      if(dir.exists(paste0(dirs$plpdRootDir,folder)) & length(list.files(paste0(dirs$plpdRootDir,folder)))==3){
        message(paste0("Files already exist"))
      }else{
        message("download")
        
        suppressWarnings(dir.create(paste0(dirs$plpdRootDir,folder),recursive = T))
        
        # create the temporary file
        if(Sys.getenv("TEMP")!=""){
          td=paste0(Sys.getenv("TEMP"),"\\PrescRtemp")
        }else{
          td=paste0(getwd(),"/PrescRtemp")
        }
        on.exit(unlink(td))
        tf = tempfile(tmpdir = td,fileext = ".zip")
        if(!dir.exists(td)){
          dir.create(td)
        }
        utils::download.file(url = url, destfile = tf)
        # unzip in temporary folder
        unzip(zipfile = tf, exdir = paste0(dirs$plpdRootDir,folder))
        
        plpdFiles = list.files(paste0(dirs$plpdRootDir,folder))[which(
          grepl("ADDR|CHEM|PDPI|addr|chem|pdpi",list.files(paste0(dirs$plpdRootDir,folder))))]
        
        # save file in the specified format
        for(j in plpdFiles){
          if(grepl("ADDR|addr",j)){
            path = paste0(dirs$plpdRootDir,folder,"/","addr_",prefix,".",outF)
            if(!file.rename(paste0(dirs$plpdRootDir,folder,"/",j),path)){"FALSE"}
          }
          if(grepl("CHEM|chem",j)){
            path = paste0(dirs$plpdRootDir,folder,"/","chem_",prefix,".",outF)
            if(!file.rename(paste0(dirs$plpdRootDir,folder,"/",j),path)){"FALSE"}
          }
          if(grepl("PDPI|pdpi",j)){
            path = paste0(dirs$plpdRootDir,folder,"/","pdpi_",prefix,".",outF)
            if(!file.rename(paste0(dirs$plpdRootDir,folder,"/",j),path)){"FALSE"}
          }
          
        }
        
        # rm tmp file
        unlink(tf)
        unlink(td,recursive = T)
        
      }
      
    }
  }
  ##############################################################
  #### bnf, demog, qof from github
  
  # chose year for bnf file
  bnffile = switch(as.character(year),
                   "2018" = "bnf_201901.csv",
                   "2019" = "bnf_202001.csv")
  
  # chose biennium for qof file
  qoffolder = switch(as.character(year),
                     "2018" = "qof_1819",
                     "2019" = "qof_1819")
  qofprefix = switch(as.character(year),
                     "2018" = "qofGP_1819",
                     "2019" = "qofGP_1819")
  
  # define qof file names
  qofFiles = paste0(qofprefix,c("_CardioVascular","_dependency",
                                "_lifestyle","_mental","_muscul","_respiratory"),".csv")
  
  # define subfolder name
  inFolder = c("/",paste0("/demog_",prefix,"/"),paste0("/",qoffolder,"/"))
  
  # define final folder name for data to be downloaded
  inFolderFinal = paste0(dirs$gitHubCsvDir,inFolder)
  outFolderFinal = gsub("/csv","",inFolderFinal)
  
  # add filenames to path
  inFiles = list( bnffile,c(paste0("demog_",prefix,".csv"),paste0("demogMap_",prefix,".csv")),
                  qofFiles)
  
  
  if(is.list(inFiles)){
    names(inFiles) = inFolderFinal
  }
  
  if(any(bnfTF,demogTF,qofTF)){
    if(all(bnfTF&demogTF&qofTF)){filesN=1:3}
    if(all(bnfTF&demogTF&!qofTF)){filesN=1:2}
    if(all(bnfTF&!demogTF&qofTF)){filesN=c(1,3)}
    if(all(!bnfTF&demogTF&qofTF)){filesN=2:3}
    if(all(bnfTF&!demogTF&!qofTF)){filesN=1}
    if(all(!bnfTF&demogTF&!qofTF)){filesN=2}
    if(all(!bnfTF&!demogTF&qofTF)){filesN=3}
    
    
    inFiles = inFiles[filesN]
    inFolderFinal = inFolderFinal[filesN]
    
    # final download 
    for (i in 1:length(inFiles)){
      for (j in inFiles[[i]]){
        message(paste(j,collapse='\n'))
        if(file.exists(paste0(dirs$inputdir,outFolderFinal[i],j))){
          message("Files already exist")
        }else{
          message("download")
          if(!dir.exists(paste0(dirs$inputdir,outFolderFinal[i]))){
            dir.create(paste0(dirs$inputdir,outFolderFinal[i]),recursive = T)
          }
          download.file(url = paste0(dirs$urlGH,inFolderFinal[i],j),
                        destfile = paste0(dirs$inputdir,outFolderFinal[i],j))
          
          if(grepl("bnf",j)){
            bnf = read.csv(paste0(dirs$inputdir,outFolderFinal[i],j),sep = ",",stringsAsFactors = FALSE)
            write.csv2(bnf,paste0(dirs$inputdir,outFolderFinal[i],j),row.names = F)
          }
          
          if(grepl("demogMap",j)){
            demogMap = read.csv(paste0(dirs$inputdir,outFolderFinal[i],j),sep = ";",stringsAsFactors = FALSE)
            demogMap=checkDemogMap(demogMap,settings)
            write.csv2(demogMap,paste0(dirs$inputdir,outFolderFinal[i],j),row.names = F)
          }
          
        }
      }
    }
  }
  
  return("All Files Downloaded")
  
}
muschitiello/Rplp documentation built on June 13, 2020, 10:10 a.m.