R/main.R

Defines functions processSheets getSheets sheetIsAYear getODS exportCSV getDataTable

Documented in exportCSV getDataTable

#' @import magrittr
#' @importFrom magrittr "%>%"

url.allvehicles <-"https://www.gov.uk/government/statistical-data-sets/all-vehicles-veh01"
file_downloadpath <- system.file("raw-data/veh0122.ods", package="VEH0122")
file_exportRDSpath <- system.file("data/veh0122.RDS", package="VEH0122")
file_exportCSVpath <- system.file("inst/extdata/veh0122.csv", package="VEH0122")
file_exportTSVpath <- system.file("inst/extdata/veh0122.tsv", package="VEH0122")

#' Vehicle Ownership statistics downloader & formatter.
#' @description Download VEH0122 stats on vehicle ownership from gov.uk,
#' reformat into a single data table & save locally. Currently includes cars
#' only in assembled file.
#' @param download Look for a locally-saved source VEH0122.ods file first, otherwise, download the latest VEH0122 stats.
#' TRUE = download the file from the main url,
#' FALSE = use already-downloaded.
#' @param get_from_RDS Look for a locally-saved VEH0122.RDS data.frame first, before trying download
#' @param type "Cars", "Motorcycles", or "Other" 
#' @return Returns the data frame.
getDataTable <- function(download=FALSE, get_from_RDS=TRUE, type="Cars") {
    processAndSave <- function() {
        processSheets();
        saveRDS(df_global, file = file_exportRDSpath);
    }
    downloadProcessAndSave <- function() {
        getODS()
        processAndSave()
    }
    if (download==FALSE) {
        if(file.exists(file_downloadpath)) {
            if( (get_from_RDS) && file.exists(file_exportRDSpath) ) {
                df_global <<- readRDS(file_exportRDSpath)
                } else {
                    processAndSave()
                    } 
                } # file.exists file_downloadpath 
    } ################## if download FALSE ################################
    else {
        downloadProcessAndSave();
       } ################## if download TRUE ################################ 
    print("finished")
    return(df_global)
}

#' Store a VEH0122.csv file locally.
#' @description Checks whether there is downloaded information,
#' and if not, or if refresh=TRUE is specified, calls getDataTable
#' to retrieve VEH0122 car ownership stats, then saves these in
#' VEH0122.csv locally.
#' @param refresh Download the latest .ods file even if we have
#' a local copy. Default is FALSE (i.e. look for a local version of VEH0122.ods
#' first).
exportCSV <- function(refresh=FALSE) {
    if(!(is.null(df_global)) && refresh==FALSE) {
        print("using pre-existing VEH0122 data object")
        } # if null df_global
    else if (file.exists(file_exportRDSpath)) {
            ##        df_global <<- read.table(file.exportRDSpath, header=T, stringsAsFactors=F)
      df_global <<- readRDS(file_exportRDSpath)      
        } # if exists file.exportRDSpath
    else {
       getDataTable() 
    } # else 
    write.table(x=df_global, file=file_exportCSVpath, sep=",",  row.names=FALSE)
}

getODS <- function() {
html.allvehicles <- xml2::read_html(url.allvehicles)
urltodownload <- rvest::html_nodes(html.allvehicles,
                                   xpath='//h2[@id="licensed-vehicles"]/../p[starts-with(text(),"VEH0122")]/span[".attachment-inline"]/a[@href]')
urltodownload <- rvest::html_node(urltodownload, xpath="@href") %>% rvest::html_text()

#                                   xpath='//h2[@id="licensed-vehicles"]/../p[starts-with(text(),"VEH0122")]/span[".attachment-inline"]/a[@href]') %>%
download.file(url=urltodownload,destfile=file_downloadpath)
}

sheetIsAYear <- function(sheet) {
    if (
    (nchar(sheet) == 4)
    )  {
        if (is.numeric(sheet)) {
            return (TRUE)
        } else {
            coercedsheet = as.integer(sheet)
            if ((is.numeric(coercedsheet))
                &&
                (nchar(coercedsheet) == 4))
            { return (TRUE) } else {
                                return (FALSE)
                                }
        } # if is.numeric
    } # if nchar(sheet) == 4
    else {
        return (FALSE)
        }
} # function sheetIsAYear

getSheets <-function() {
    sheets <- readODS::ods_sheets(path=file_downloadpath)
    yearsheets <- NULL
    for (sh in sheets) {
        if (sheetIsAYear(sh)) {
            yearsheets <-c(yearsheets, sh)
            }
    } # foreach
    yearsheets <- sort(yearsheets)
    return(yearsheets)
    } # function getSheets

processSheets <- function() {
    yearsheets <-getSheets()
    for (sh in yearsheets) {
        processSheet(sh)
    }
    colnames(df_global)[1] <<- "Postcodes"
    }
mihamn/VEH0122 documentation built on July 11, 2020, 3:45 a.m.