R/types.R

Defines functions checkSchemaInArrow specialtypeshandle

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# types.R. This file handles the data type mapping between Spark and R

# The primitive data types, where names(PRIMITIVE_TYPES) are Scala types whereas
# values are equivalent R types. This is stored in an environment to allow for
# more efficient look up (environments use hashmaps).
PRIMITIVE_TYPES <- as.environment(list(
  "tinyint" = "integer",
  "smallint" = "integer",
  "int" = "integer",
  "bigint" = "numeric",
  "float" = "numeric",
  "double" = "numeric",
  "decimal" = "numeric",
  "string" = "character",
  "binary" = "raw",
  "boolean" = "logical",
  "timestamp" = c("POSIXct", "POSIXt"),
  "date" = "Date",
  # following types are not SQL types returned by dtypes(). They are listed here for usage
  # by checkType() in schema.R.
  # TODO: refactor checkType() in schema.R.
  "byte" = "integer",
  "integer" = "integer"
  ))

# The complex data types. These do not have any direct mapping to R's types.
COMPLEX_TYPES <- list(
  "map" = NA,
  "array" = NA,
  "struct" = NA)

# The full list of data types.
DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))

SHORT_TYPES <- as.environment(list(
  "character" = "chr",
  "logical" = "logi",
  "POSIXct" = "POSIXct",
  "integer" = "int",
  "numeric" = "num",
  "raw" = "raw",
  "Date" = "Date",
  "map" = "map",
  "array" = "array",
  "struct" = "struct"
))

# An environment for mapping R to Scala, names are R types and values are Scala types.
rToSQLTypes <- as.environment(list(
  "integer" = "integer", # in R, integer is 32bit
  "numeric" = "double",  # in R, numeric == double which is 64bit
  "double" = "double",
  "character" = "string",
  "logical" = "boolean"))

# Helper function of converting decimal type. When backend returns column type in the
# format of decimal(,) (e.g., decimal(10, 0)), this function coverts the column type
# as double type. This function converts backend returned types that are not the key
# of PRIMITIVE_TYPES, but should be treated as PRIMITIVE_TYPES.
# @param A type returned from the JVM backend.
# @return A type is the key of the PRIMITIVE_TYPES.
specialtypeshandle <- function(type) {
  returntype <- NULL
  m <- regexec("^decimal(.+)$", type)
  matchedStrings <- regmatches(type, m)
  if (length(matchedStrings[[1]]) >= 2) {
    returntype <- "double"
  }
  returntype
}

# Helper function that checks supported types in Arrow.
checkSchemaInArrow <- function(schema) {
  stopifnot(inherits(schema, "structType"))

  if (!requireNamespace("arrow", quietly = TRUE)) {
    stop("'arrow' package should be installed.")
  }

  # Both cases below produce a corrupt value for unknown reason. It needs to be investigated.
  field_strings <- sapply(schema$fields(), function(x) x$dataType.toString())
  if (any(field_strings == "FloatType")) {
    stop("Arrow optimization in R does not support float type yet.")
  }
  if (any(field_strings == "BinaryType")) {
    stop("Arrow optimization in R does not support binary type yet.")
  }
  if (any(startsWith(field_strings, "ArrayType"))) {
    stop("Arrow optimization in R does not support array type yet.")
  }

  # Arrow optimization in Spark does not yet support both cases below.
  if (any(startsWith(field_strings, "StructType"))) {
    stop("Arrow optimization in R does not support nested struct type yet.")
  }
  if (any(startsWith(field_strings, "MapType"))) {
    stop("Arrow optimization in R does not support map type yet.")
  }
}

Try the SparkR package in your browser

Any scripts or data that you put into this service are public.

SparkR documentation built on June 3, 2021, 5:05 p.m.