R/rmcfs.io.R
In rmcfs: The MCFS-ID Algorithm for Feature Selection and Interdependency Discovery

Documented in read.adh read.adx write.adh write.adx write.arff

###############################
#read.adx
###############################
# d <- read.adx(paste0(dataPath,"/weather.adx"))
# showme(d)
# summary(d)
read.adx <- function(file = ""){
  data <- read.zip(file, 'adx')
  return(data)
}

###############################
#read.adh
###############################
read.adh <- function(file = ""){
  data <- read.zip(file, 'adh')
  return(data)
}

###############################
#read.zip
###############################
read.zip <- function(file, fileFormat){
  if(File.exists(file)){
    if(file.ext(file) == 'zip'){
      tmp_dir <- tempdir()
      utils::unzip(file, exdir = tmp_dir)
      file_path <- file.path(tmp_dir, paste0(drop.file.ext(basename(file)), ".", fileFormat))
    }else if(file.ext(file) == fileFormat){
      file_path <- file
    }else{
      stop(paste0("Incorrect file format. File: ", file))
    }
  }else{
    stop(paste0("File does not exist. File: ", file))
  }
  if(tolower(fileFormat) == 'adx'){
    data <- import.adx(file_path)
  }else if(tolower(fileFormat) == 'adh'){
    data <- import.adh(file_path)
  }else{
    data <- NULL
  }
  
  return(data)
}

###############################
#import.adx
###############################
# d <- read.adx(paste0(dataPath,"/weather.adx"))
# showme(d)
# summary(d)
import.adx <- function(file = "")
{
  conn <- file(file, "r", blocking = FALSE)
  dat <- readLines(file, warn = F)
  close(conn)
  dat <- unlist(lapply(dat,strsplit,"\\{"))
  dat <- unlist(lapply(dat,strsplit,"\\}"))
  #dat <- strsplit(dat,"\\{")
  #dat <- strsplit(dat,"\\}")
  dat <- string.trim(dat)
  dat <- dat[!string.starts.with(dat, "#", trim = T)]
  dat <- dat[dat!=""]
  attr_idx <- which(dat == 'attributes')
  event_idx <- which(dat == 'events')
  
  #if broken
  if(length(attr_idx)==0)
    stop(paste0("Attributes section is not defined. Missing 'attributes' keyword."))
  
  if(length(event_idx)==0)
    stop(paste0("Events/Objects section is not defined. Missing 'events' keyword."))
  
  if(attr_idx > event_idx)
    stop(paste0("Events section detected before Attributes section."))
  
  attr <- dat[(attr_idx+1):(event_idx-1)]
  events <- dat[(event_idx+1):length(dat)]
  dat <- NULL

  attr <- import.attributes(attr)
  if(length(events)<1)
    stop(paste0("Events are not defined. Number of events = 0"))

  events <- string.trim(events)
  events <- events[!string.starts.with(events, "#", trim = T)]
  events <- events[events!=""]
  events <- gsub('\t', ' ', events)
  events <- do.call(rbind, strsplit(events,','))
  events[events == "?"] <- NA
  events <- as.data.frame(events, stringsAsFactors = F)
  
  if(ncol(events) != nrow(attr)){
    stop(paste0("Number of columns in events section (",ncol(events),") does not correspond to attributes (",nrow(attr),") definition."))
  }
  
  names(events) <- attr$attr_names
  #and cast to numeric columns
  if(any(attr$numeric_cols)){
    events[attr$numeric_cols] <- apply(events[,attr$numeric_cols, drop = FALSE], 2, function(x) as.numeric(x))
  }
  #and cast to nominal columns
  if(any(attr$nominal_cols)){
    events[attr$nominal_cols] <- apply(events[,attr$nominal_cols, drop = FALSE], 2, function(x) as.character(x))
  }
  
  #move decision to the end if it is defined
  # if(any(attr$decision_cols)){
  #   events <- cbind(events[!attr$decision_cols], events[attr$decision_cols])
  # }
  
  events <- events[, !attr$ignore_cols]
  attr(events, 'attr_weights') <- attr$attr_weights
  attr(events, 'target') <- names(events)[attr$decision_cols]

  return(events)
}

###############################
#import.adh
###############################
import.adh <- function(file = "")
{
  conn <- file(file, "r", blocking = FALSE)
  dat <- readLines(file, warn = F)
  close(conn)
  attr <- import.attributes(dat)
  
  csvFileName <- paste0(sub('\\.adh', '', file),".csv")
  if(File.exists(csvFileName)){
    events <- load.csv(csvFileName, na.char = c('?', 'NA', 'NaN'))
  }else{
    stop(paste0("File does not exist. File: ", csvFileName))
  }
  if(ncol(events) != nrow(attr)){
    stop(paste0("Number of columns in csv file (",ncol(events),") does not correspond to adh (",nrow(attr),") header."))
  }
  
  names(events) <- attr$attr_names
  #cast to nominal columns
  if(any(attr$nominal_cols)){
    events[attr$nominal_cols] <- apply(events[,attr$nominal_cols, drop = F], 2, function(x) as.character(x))
  }
  
  events <- events[, !attr$ignore_cols]
  attr(events, 'attr_weights') <- attr$attr_weights
  attr(events, 'target') <- names(events)[attr$decision_cols]
  
  return(events)
}

###############################
#import.attributes
###############################
import.attributes <- function(attr)
{
  if(length(attr)<1)
    stop(paste0("Attributes are not defined. Number of attributes = 0"))

  attr <- string.trim(attr)
  attr <- attr[!string.starts.with(attr, "#", trim = T)]
  attr <- attr[attr!=""]
  attr <- gsub('\t',' ', attr)
  
  nominal_cols <- grepl(' nominal', attr)
  numeric_cols <- grepl(' numeric', attr)
  decision_cols <- grepl(' decision', attr)
  ignore_cols <- grepl(' ignore', attr)
  
  if(sum(decision_cols) > 1)
    stop(paste0("Multiple decision attributes detected. One is expected."))
  
  attr_names <- gsub(' nominal','',attr)
  attr_names <- gsub(' numeric','',attr_names)
  attr_names <- gsub(' decision','',attr_names)
  attr_names <- gsub(' ignore','',attr_names)
  attr_names[decision_cols] <- string.trim(unlist(strsplit(attr_names[decision_cols],'\\('))[1])
  attr_names <- gsub('"','',attr_names)
  attr_names <- gsub("'",'',attr_names)

  attr_names_tmp <- strsplit(attr_names,'\\s+')
  attr_names_tmp <- lapply(attr_names_tmp, function(x){if(length(x)<2) return(c(x,NA)) else return(x)})
  attr_names <- unlist(lapply(attr_names_tmp, function(x){string.trim(x[[1]])}))
  weights <- unlist(lapply(attr_names_tmp, function(x){string.trim(x[[2]])}))
  weights <- as.numeric(gsub("\\]",'',gsub('\\[','',weights)))
  weights[is.na(weights)] <- 1

  if(sum(nominal_cols & numeric_cols)>0)
    stop(paste0("Multiple type attributes detected. One type is expected. Attributes: ",
                paste0(attr_names[nominal_cols & numeric_cols],collapse = ', ')))

  if(sum(nominal_cols | numeric_cols) < length(attr))
    stop(paste0("Missing type attributes detected. One type is expected. Attributes: ",
                paste0(attr_names[!(nominal_cols | numeric_cols)], collapse = ', ')))
  
  ret.df <- data.frame(attr_names, nominal_cols, numeric_cols, ignore_cols, decision_cols, attr_weights = weights, 
                       stringsAsFactors = F)
  return(ret.df)
}

###############################
#write.adx
###############################
# data(alizadeh)
# d <- alizadeh
# d[5,1] <- Inf
# d[5,3] <- NA
# write.adx(d, file="~/ali_test.adx", chunk_size=2000)
write.adx <- function(x, file = "", target = NA, chunk_size = 100000, zip = FALSE)
{
  if(file != ""){
    if(file.ext(file) == "zip"){
      file <- paste0(drop.file.ext(file),".adx")
      zip <- TRUE
    }else if(file.ext(file) != "adx"){
      stop(paste0("Incorrect file name. File: ", file, ". Expected: '.adx'"))
    }
  }
  write.zip(x, file, target, chunk_size, zip, "adx")
}

###############################
#write.adh
###############################
# data(alizadeh)
# d <- alizadeh
# d[5,1] <- Inf
# d[7,2] <- Inf
# d[5,3] <- NA
# d[7,4] <- NA
# write.adh(d, file="~/ali_test.adh")
write.adh <- function(x, file = "", target = NA, chunk_size = 100000, zip = FALSE)
{
  if(file != ""){
    if(file.ext(file) == "zip"){
      file <- paste0(drop.file.ext(file),".adh")
      zip <- TRUE
    }else if(file.ext(file) != "adh"){
      stop(paste0("Incorrect file name File: ",file, ". Expected: '.adh'"))
    }
  }
  write.zip(x, file, target, chunk_size, zip, "adh")
}

###############################
#write.arff
###############################
# data(alizadeh)
# x <- alizadeh
# x$nominalAttr <- c(rep("val1",nrow(x)/2),rep("val2",nrow(x)))[1:nrow(x)]
# d[5,1] <- Inf
# d[7,2] <- Inf
# d[5,3] <- NA
# d[7,4] <- NA
# write.arff(x, file="~/ali_test.arff", chunk_size=2000)
write.arff <- function(x, file = "", target = NA, chunk_size = 100000, zip = FALSE)
{
  if(file != ""){
    if(file.ext(file) == "zip"){
      file <- paste0(drop.file.ext(file),".arff")
      zip <- TRUE
    }else if(file.ext(file) != "arff")
      stop(paste0("Incorrect file format. File: ",file, ". Expected: '.arff'"))
  }
  write.zip(x, file, target, chunk_size, zip, "arff")
}  

###############################
#write.zip
###############################
write.zip <- function(x, file, target, chunk_size, zip, fileFormat)
{
  fileName <- string.trim(file)
  if(fileName == ""){
    zip <- F
    fileDir <- ""
  }else{
    fileDir <- dirname(file)
  }
  
  if(zip){
    tmp_dir <- tempdir()
  }else{
    tmp_dir <- fileDir
  }
  
  dataFileName <- file.path(tmp_dir, basename(fileName))
  zipFileName <- file.path(fileDir, paste0(sub(paste0('\\.',fileFormat), '', basename(fileName)), '.zip'))
  
  if(fileFormat == "adx"){
    export.adx(x, dataFileName, target, chunk_size)
  }else if(fileFormat == "arff"){
    export.arff(x, dataFileName, target, chunk_size)
  }else if(fileFormat == "adh"){
    export.adh(x, dataFileName, target, chunk_size)
    csvFileName <- paste0(sub('\\.adh', '', dataFileName),".csv")
    dataFileName <- c(dataFileName, csvFileName)
  }
  
  if(zip){
    files2zip <- normalizePath(dataFileName)
    if(File.exists(zipFileName)){
      delete.files(zipFileName)
    }
    utils::zip(zipFileName, files = file.path(files2zip), flags = "-jq")
    delete.files(files2zip)
  }
}

###############################
#retrieve.attributes
###############################
retrieve.attributes <- function(x, target = NA)
{
  numericCols <- sapply(x[1,], is.numeric)
  
  if(is.na(target) & !is.null(attr(x, 'target')))
    target <- attr(x, 'target')
  if(is.character(target))
    target <- match(target, names(x))
  if(is.na(target) || target < 1 || target > ncol(x)){
    target <- ncol(x)
    warning(paste("Target attribute is not correctly defined - setting the last column as target attribute: ", names(x)[target]))
  }
  
  if( ('attr_weights' %in% names(attributes(x))) & (ncol(x) != length(attr(x, "attr_weights"))) )
    stop(paste0("Size (ncol=", ncol(x), ") of input data.frame does not match to length of weights vector(",
                length(attr(x, "attr_weights")),")"))
    
  attrHeader <- matrix(nrow = ncol(x), ncol = 4, byrow = FALSE)
  attrHeader[,1] <- paste0('"',names(x),'"')
  attrHeader[numericCols, 2] <- "numeric"
  attrHeader[!numericCols, 2] <- "nominal"
  if('attr_weights' %in% names(attributes(x))){
    attrHeader[, 3] <- paste0('[',attr(x, "attr_weights"),']')
  }else{
    attrHeader[, 3] <- rep('[1]', nrow(attrHeader))
  }
  attrHeader[target, 4] <- "decision"
  attrHeader[is.na(attrHeader[,4]),4] <- ""
  colnames(attrHeader) <- c('attribute', 'type', 'weight', 'role')
  return(attrHeader)
}

###############################
#export.adh
###############################
export.adh <- function(x, file = '', target = NA, chunk_size = 100000)
{
  if(file == ""){
    file <- stdout()
    adhFileName <- ''
    csvFileName <- ''
  }else if(is.character(file)) {
    if(tolower(file.ext(file)) != "adh")
      stop("Incorrect file extension *.adh expected.")
    adhFileName <- file
    csvFileName <- paste0(sub('\\.adh', '', adhFileName),".csv")
    file <- file(adhFileName, open = "wb")
  }
  
  if(!inherits(file, "connection"))
    stop("Argument 'file' must be a character string or connection.")
  
  if (!is.data.frame(x))
    x <- data.frame(x)
  
  verbose <- F
  if(as.numeric(ncol(x)) * nrow(x) > chunk_size)
    verbose <- T
  
  if(verbose)
    cat(paste0("Retrieving attributes meta info...\n"))
  attrHeader <- retrieve.attributes(x, target)
  attrHeader[,1] <- paste0(" ", attrHeader[,1])
  l <- apply(attrHeader, 1, paste, collapse = " ")
  
  ##### write attributes #####
  if(verbose)
    cat(paste0("Saving header...\n"))
  writeLines(l, file)
  #closeAllConnections()
  close(file)
  
  ##### write events #####
  if(verbose)
    cat(paste0("Saving data...\n"))
  save.csv(x, csvFileName, na.char = 'NA')
  
  if(verbose){
    cat("Data has been saved.\n")
  }
  return(TRUE)
}

###############################
#export.adx
###############################
export.adx <- function(x, file = '', target = NA, chunk_size = 100000)
{
  if(file == ''){
    file <- stdout()
  }else if(is.character(file)) {
    if(tolower(file.ext(file)) != "adx")
      stop("Incorrect file extension *.adx expected.")
    
    fileName <- file
    file <- file(file, open = "wb")
    on.exit(close(file))
  }
  
  if(!inherits(file, "connection"))
    stop("Argument 'file' must be a character string or connection.")
  
  if (!is.data.frame(x))
    x <- data.frame(x)
  
  verbose <- F
  if(as.numeric(ncol(x)) * nrow(x) > chunk_size)
    verbose <- T

  if(verbose)
    cat(paste0("Retrieving attributes meta info...\n"))
  attrHeader <- retrieve.attributes(x, target)
  attrHeader[,1] <- paste0(" ", attrHeader[,1])
  l <- apply(attrHeader, 1, paste, collapse = " ")
  
  ##### write attributes #####
  if(verbose)
    cat(paste0("Saving header...\n"))
  writeLines("attributes", file)
  writeLines("{", file)
  writeLines(l, file)
  writeLines("}", file)

  ###### write events ######
  writeLines("events", file)
  writeLines("{", file)
  export.events(x, file, header = FALSE, na.char = '?', chunk_size, verbose)
  writeLines("}", file)

  if(verbose){
    cat("\nData has been saved.\n")
  }
  return(TRUE)
}

###############################
#export.arff
###############################
export.arff <- function(x, file = "", target = NA, chunk_size = 100000)
{
  if(file == ""){
    file <- stdout()
  }else if(is.character(file)) {
    file <- file(file, "wb")
    on.exit(close(file))
  }
  
  if(!inherits(file, "connection"))
    stop("Argument 'file' must be a character string or connection.")
  
  #if (!is.data.frame(x) && !is.matrix(x))
  if (!is.data.frame(x))
    x <- data.frame(x)
  
  if(is.character(target)) 
    target <- match(target, names(x))
  if(is.na(target) || target < 1 || target > ncol(x)) 
    target <- ncol(x)
  
  if(target!=ncol(x)){
    #move decision column to the end to meet ARFF specification
    decisionName <- names(x)[target]
    x <- cbind(x[,-target], x[,target])
    target <- ncol(x)
    names(x)[target] <- decisionName
  }  

  verbose <- F
  if(as.numeric(ncol(x)) * nrow(x) > chunk_size)
    verbose <- T
  
  if(verbose)
    cat(paste0("Retrieving attributes meta info...\n"))
  numeric.cols <- sapply(x[1,], is.numeric)
  
  ## Write Attributes
  if(verbose)
    cat(paste0("Saving header...\n"))
  writeLines(paste0("@relation ",'"',names(x)[target],'"'), file)  
  writeLines("", file)
  
  attrHeader <- matrix(nrow = length(colnames(x)), ncol = 3, byrow = FALSE)
  attrHeader[,1] <- "@attribute"
  attrHeader[,2] <- paste0(" '", names(x), "'")
  attrHeader[numeric.cols, 3] <- " real"
  if(any(!numeric.cols)){
    attrHeader[!numeric.cols, 3] <- sapply(x[,!numeric.cols, drop=F], function(x) paste0(" {", paste(unique(x), collapse=","),"}"))
  }
  attrHeader[is.na(attrHeader[,3]),3] <- ""
  l <- apply(attrHeader, 1, paste, collapse="")
  cat(l, file=file, sep="\n")
  
  # convert input data to matrix
  x <- df.to.matrix(x, chunk_size, verbose)
  
  ## Write Events
  writeLines("", file)
  writeLines("@data", file)
  writeLines("", file)
  export.events(x, file, header = FALSE, na.char = '?', chunk_size, verbose)
  writeLines("", file)
  
  if(verbose){
    cat("\nData has been saved.\n")
  }
  return(TRUE)
}

###############################
#export.events
###############################
export.events <- function(x, file = '', header = FALSE, na.char = '?', chunk_size = 100000, verbose = FALSE)
{
  if(file == ''){
    file <- stdout()
  }else if(is.character(file)) {
    file <- file(file, "wb")
    on.exit(close(file))
  }

  if(!inherits(file, "connection"))
    stop("Argument 'file' must be a character string or connection.")
  
  if (!is.data.frame(x))
    x <- data.frame(x)
  
  cols.x <- as.numeric(ncol(x))
  rows.x <- as.numeric(nrow(x))
  
  # convert input data to matrix
  if(verbose)
    cat("Conversion of input data...\n") 
  x <- df.to.matrix(x, chunk_size, verbose)
  
  col.steps <- my.seq(chunk_size, cols.x, chunk_size, T)
  rstep <-  ceiling(chunk_size / cols.x)
  row.steps <- my.seq(rstep, rows.x, rstep, T)
  chunks <- length(col.steps) * length(row.steps)

  if(verbose)
    cat(paste0("Saving data...\n"))
  #write header
  if(header)
    cat(paste0(colnames(x), collapse=','), file=file, sep="\n")
  
  #write events
  chunk <- 1
  row.begin <- 1
  if(chunks > 1)
    pb <- utils::txtProgressBar(min = 1, max = chunks, style = 3)
  for(i in row.steps) {
    col.begin <- 1
    for(j in col.steps) {
      # m must be matrix type
      m <- x[row.begin:i, col.begin:j, drop=FALSE]
      m <- fix.matrix(m, na.char = na.char)
      l <- apply(m, 1, paste, collapse=",")
      
      if(j > col.steps[1])
        cat(',', file = file, sep="")
      
      if(length(l)>1 | j == cols.x){
        cat(l, file = file, sep="\n")
      }else{
        cat(l, file = file, sep="")
      }
      rm(m, l)
      col.begin <- j + 1
      chunk <- chunk + 1
    }
    row.begin <- i + 1

    #set the Progress Bar
    if(chunk >1 & verbose)
      utils::setTxtProgressBar(pb, chunk)
  }
  
  return(TRUE)
}
Any scripts or data that you put into this service are public.
rmcfs documentation built on Sept. 11, 2024, 8:41 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
rmcfs
The MCFS-ID Algorithm for Feature Selection and Interdependency Discovery

R/rmcfs.io.R
In rmcfs: The MCFS-ID Algorithm for Feature Selection and Interdependency Discovery

Defines functions export.events export.arff export.adx export.adh retrieve.attributes write.zip write.arff write.adh write.adx import.attributes import.adh import.adx read.zip read.adh read.adx

Documented in read.adh read.adx write.adh write.adx write.arff

Try the rmcfs package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rmcfs The MCFS-ID Algorithm for Feature Selection and Interdependency Discovery

R/rmcfs.io.R In rmcfs: The MCFS-ID Algorithm for Feature Selection and Interdependency Discovery

Defines functions export.events export.arff export.adx export.adh retrieve.attributes write.zip write.arff write.adh write.adx import.attributes import.adh import.adx read.zip read.adh read.adx

Documented in read.adh read.adx write.adh write.adx write.arff

Try the rmcfs package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rmcfs
The MCFS-ID Algorithm for Feature Selection and Interdependency Discovery

R/rmcfs.io.R
In rmcfs: The MCFS-ID Algorithm for Feature Selection and Interdependency Discovery