
Defines functions read_data

Documented in read_data

read_data <-
function(filepath, type=c("csv","delim","folder"), index=NULL, ...) {    
    if (type=="csv") { data <- read.csv(filepath,...) # CSV FILES
    } else if (type=="delim") { data <- read.delim(filepath,...) # TAB-DELIMITED FILES
	} else if (type=="folder") { 
        if (is.null(index)) stop("Must supply an index if using the folder option.")
        labels <- read.csv(index,header=FALSE)
        files <- list.files(path=filepath,full.names=TRUE)
        frame <- c()
        for (file in labels[,1]) {
            filename <- NULL
            for (file2 in files) {
                if (basename(file2) == file) {
                    filename <- file2
            if (is.null(filename)) stop("Could not corresponding file from index file in folder.")
            lines <- readLines(filename)
            text <- paste(lines,collapse="\n")
            frame <- append(frame,text)
        if (nrow(labels) == length(files)) {
            data <- data.frame(Text.Data=frame,Labels=labels[,2])
        } else if (nrow(labels) < length(files)) {
            diff <- length(files)-nrow(labels)
            fill <- as.data.frame(rep(NA,diff))
            raw_labels <- as.data.frame(labels[,2])
            colnames(fill) <- colnames(raw_labels)
            labels_fixed <- rbind(raw_labels,fill)
            data <- data.frame(Text.Data=frame,Labels=labels_fixed)
        } else {
            stop("There are more labels than documents in the index file.")

Try the RTextTools package in your browser

Any scripts or data that you put into this service are public.

RTextTools documentation built on April 26, 2020, 9:05 a.m.