R/superbid.R

Defines functions superbid

Documented in superbid

#' Superbid
#'
#' Superbid is a website that auctiones various items, see
#' https://www.superbid.net/.
#' 
#' @param type It must be one of the following arguments: 'api' shows the IDs of
#'     each category, 'order' collect auctioned items of the day and 'collect'
#'     collecting the stored orders of the day.
#' @param ID A vector contains the API IDs.
#'
#' @section storage:
#'
#' The data is stored in mongodb database (superbid) in three collections:
#' 'order' to the IDs auctioned of the day, 'data' for the auctioned item data, '
#' 'img.chunks' and 'img.files' for the images of auctioned item.
#'
#' @export

superbid <- function(type, ID = c(10000, 10012, 10033, 10022, 10038),
                     order.Date = Sys.Date(), collect.Date = Sys.Date()) {

    if (type == "api") {
        # Mongo Config
        # mc <- mongolite::mongo(db = "superbid", collection = "api")

        url <- "https://api.sbwebservices.net/offer-query/categories/?portalId=2"
        out <- jsonlite::fromJSON(url, FALSE)
        out <- lapply(out$productsType, function(x) dplyr::bind_rows(x$categories))
        out <- dplyr::bind_rows(out)[, 1:2]
        return(out)
        # url.default <- "https://www.superbid.net/categorias/"
        # 
        # mc$insert(out, stop_on_error = FALSE)
        # mc$disconnect()
    }

    # ------------------------------------------------------------------------------

    if (type == "order") {
        # Mongo Order
        mo <- mongolite::mongo(db = "superbid", collection = "order")

        # Carros,  Motos,  Ônibus,  Caminhões, Vans
        ## OBS: Categorias de coleta podem ficar armazenados no mongodb
        id <- list()
        
        for (i in 1:length(ID)) {
            url <-  paste0("https://api.sbwebservices.net/offer-query/offers?portalId=2&filter=",
                           "statusId%3A1%3Bproduct.subCategory.category.id%3A", ID[i],
                           "&searchType=opened&start=0&limit=10000&orderBy=lotNumber:asc")

            data <- jsonlite::fromJSON(url, FALSE)
            data <- data$offers
            index <- sapply(data, function(x) as.Date(x$endDate) == order.Date)
            data <- data[index]
            id <- append(id, lapply(data, function(x) x$id))
        }
        id <- unlist(id)

        if (length(id) > 0) {
            new.order <- list("_id" = as.integer(Sys.Date()), id = id)
            new.order <- jsonlite::toJSON(new.order, auto_unbox = TRUE)
            mo$insert(new.order)
        }
        mo$disconnect()
    }

    # ------------------------------------------------------------------------------

    if (type == "collect") {
        # Mongo Order
        mo <- mongolite::mongo(db = "superbid", collection = "order")

        # Mongo Data
        md <- mongolite::mongo(db = "superbid", collection = "data")

        # GridFS
        fs <- mongolite::gridfs(db = "superbid", prefix = "img")
        
        query <- paste0('{"_id":', as.integer(collect.Date),'}')
        df <- mo$find(query)

        if (nrow(df) > 0) {
            
            n <- length(df$id[[1]])
            id.l <- split(df$id[[1]], rep(1:ceiling(n/15), each = 15)[1:n])

            data <- lapply(id.l, function(ids) {
                new.collect <- paste0(
                    "https://api.sbwebservices.net/offer-query/offers?",
                    "portalId=2&filter=id:[", paste0(ids, collapse=","), "]"
                )

                data <- jsonlite::fromJSON(new.collect, simplifyVector = FALSE)
                data <- data$offers
                return(data)
            })
            data <- unlist(data, recursive = FALSE)
            
            
            data <- lapply(data, function(x) {
                names(x)[1] <- "_id"
                x
            })

            links <- lapply(data, function(x) {
                links <- lapply(x$product$gallery, function(y) {
                    y$link
                })
                links <- unlist(links)
                links <- links[grepl("^https|^http", links)]
                
                return(list(links = links, id = x$`_id`))
            })

            ## Armazena as fotos {name: id.tar.gz}
            ## Temporarily directory
            tmpd <- tempdir()
            setwd(tmpd)
            
            out <- lapply(links, function(x) {
                sapply(x$links, function(w) {
                    file <- basename(w)
                    download.file(w, file)
                })

                tarfile <- paste0(x$id, ".tar.gz")
                
                tar(tarfile = tarfile, compression = "gzip")

                fs$write(tarfile, tarfile, "img")
                Sys.sleep(0.5)
                file.remove(list.files())
            })
            
            out <- lapply(data, function(x) {
                md$insert(jsonlite::toJSON(x, auto_unbox = TRUE, pretty = TRUE))
            })
        }

        mo$disconnect()
        md$disconnect()
        fs$disconnect()
    }


    if (!(type %in% c("crontab", "api", "order", "collect"))) {
        cat("You must provide one of the following arguments: 'api',",
            " 'order', 'collect' or 'crontab'.")
    }
}
Andryas/WEBDATA documentation built on Jan. 2, 2020, 1:31 p.m.