xlsxToR: xlsxToR

Usage Arguments Examples

View source: R/xlsxToR.r

Usage

1
xlsxToR(file, keep_sheets = NULL, header = FALSE)

Arguments

file
keep_sheets
header

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (file, keep_sheets = NULL, header = FALSE) 
{
    require(XML)
    require(plyr)
    require(pbapply)
    suppressWarnings(file.remove(tempdir()))
    file.copy(file, tempdir())
    new_file <- list.files(tempdir(), full.name = TRUE, pattern = basename(file))
    new_file_rename <- gsub("xlsx$", "zip", new_file)
    file.rename(new_file, new_file_rename)
    unzip(new_file_rename, exdir = tempdir())
    mac <- xmlToList(xmlParse(list.files(paste0(tempdir(), "/docProps"), 
        full.name = TRUE, pattern = "app.xml")))
    mac <- grepl("Macintosh", mac$Application)
    if (mac) {
        os_origin <- "1899-12-30"
    }
    else {
        os_origin <- "1899-12-30"
    }
    sheet_names <- xmlToList(xmlParse(list.files(paste0(tempdir(), 
        "/xl"), full.name = TRUE, pattern = "workbook.xml")))
    sheet_names <- do.call("rbind", sheet_names$sheets)
    rownames(sheet_names) <- NULL
    sheet_names <- as.data.frame(sheet_names, stringsAsFactors = FALSE)
    sheet_names$id <- gsub("\D", "", sheet_names$id)
    styles <- xmlToList(xmlParse(list.files(paste0(tempdir(), 
        "/xl"), full.name = TRUE, pattern = "styles.xml")))
    styles <- styles$cellXfs[sapply(styles$cellXfs, function(x) any(names(x) == 
        "applyNumberFormat"))]
    styles <- do.call("rbind", lapply(styles, function(x) as.data.frame(as.list(x[c("applyNumberFormat", 
        "numFmtId")]), stringsAsFactors = FALSE)))
    if (!is.null(keep_sheets)) {
        sheet_names <- sheet_names[sheet_names$name %in% keep_sheets, 
            ]
    }
    worksheet_paths <- list.files(paste0(tempdir(), "/xl/worksheets"), 
        full.name = TRUE, pattern = paste0("sheet(", paste(sheet_names$id, 
            collapse = "|"), ")\.xml$"))
    worksheets <- lapply(worksheet_paths, function(x) xmlRoot(xmlParse(x))[["sheetData"]])
    worksheets <- pblapply(seq_along(worksheets), function(i) {
        x <- xpathApply(worksheets[[i]], "//x:c", namespaces = "x", 
            function(node) {
                c(v = xmlValue(node[["v"]]), xmlAttrs(node))
            })
        if (length(x) > 0) {
            x_rows <- unlist(lapply(seq_along(x), function(i) rep(i, 
                length(x[[i]]))))
            x <- unlist(x)
            x <- reshape(data.frame(row = x_rows, ind = names(x), 
                value = x, stringsAsFactors = FALSE), idvar = "row", 
                timevar = "ind", direction = "wide")
            x$sheet <- sheet_names[sheet_names$id == i, "name"]
            colnames(x) <- gsub("^value\.", "", colnames(x))
        }
        x
    })
    worksheets <- do.call("rbind.fill", worksheets[sapply(worksheets, 
        class) == "data.frame"])
    entries <- xmlParse(list.files(paste0(tempdir(), "/xl"), 
        full.name = TRUE, pattern = "sharedStrings.xml$"))
    entries <- xpathSApply(entries, "//x:t", namespaces = "x", 
        xmlValue)
    names(entries) <- seq_along(entries) - 1
    entries_match <- entries[match(worksheets$v, names(entries))]
    worksheets$v[worksheets$t == "s" & !is.na(worksheets$t)] <- entries_match[worksheets$t == 
        "s" & !is.na(worksheets$t)]
    worksheets$cols <- match(gsub("\d", "", worksheets$r), LETTERS)
    worksheets$rows <- as.numeric(gsub("\D", "", worksheets$r))
    if (!any(grepl("^s$", colnames(worksheets)))) {
        worksheets$s <- NA
    }
    workbook <- lapply(unique(worksheets$sheet), function(x) {
        y <- worksheets[worksheets$sheet == x, ]
        y_style <- as.data.frame(tapply(y$s, list(y$rows, y$cols), 
            identity), stringsAsFactors = FALSE)
        y <- as.data.frame(tapply(y$v, list(y$rows, y$cols), 
            identity), stringsAsFactors = FALSE)
        if (header) {
            colnames(y) <- y[1, ]
            y <- y[-1, ]
            y_style <- y_style[-1, ]
        }
        y_style <- sapply(y_style, function(x) ifelse(length(unique(x)) == 
            1, unique(x), NA))
        if (length(styles) > 0) {
            y_style <- styles$numFmtId[match(y_style, styles$applyNumberFormat)]
        }
        y_style[y_style %in% 14:17] <- "date"
        y_style[y_style %in% c(18:21, 45:47)] <- "time"
        y_style[y_style %in% 22] <- "datetime"
        y_style[is.na(y_style) & !sapply(y, function(x) any(grepl("\D", 
            x)))] <- "numeric"
        y_style[is.na(y_style)] <- "character"
        y[] <- lapply(seq_along(y), function(i) {
            switch(y_style[i], character = y[, i], numeric = as.numeric(y[, 
                i]), date = as.Date(as.numeric(y[, i]), origin = os_origin), 
                time = strftime(as.POSIXct(as.numeric(y[, i]), 
                  origin = os_origin), format = "%H:%M:%S"), 
                datetime = as.POSIXct(as.numeric(y[, i]), origin = os_origin))
        })
        y
    })
    if (length(workbook) == 1) {
        workbook <- workbook[[1]]
    }
    workbook
  }

gvdovandzung/thongke documentation built on May 23, 2020, 12:35 a.m.