Nothing
#' Convert between Python and R objects
#'
#' @inheritParams import
#' @param x A Python object.
#'
#' @return An \R object, as converted from the Python object.
#'
#' @name r-py-conversion
#' @export
r_to_py <- function(x, convert = FALSE) {
ensure_python_initialized()
UseMethod("r_to_py")
}
#' @rdname r-py-conversion
#' @export
py_to_r <- function(x) {
if(!is_py_object(x <- py_to_r_cpp(x))) # simple obj
return(x)
# Note, the new `x` here (with convert=TRUE) won't be visible to S3 methods
# UseMethod() doesn't allow modifying the original call args
UseMethod("py_to_r")
}
#' @export
r_to_py.default <- function(x, convert = FALSE) {
r_to_py_impl(x, convert = convert)
}
#' @export
py_to_r.default <- function(x) {
py_to_r_cpp(x)
}
#' R wrapper for Python objects
#'
#' S3 method to create a custom R wrapper for a Python object.
#' The default wrapper is either an R environment or an R function
#' (for callable python objects).
#'
#' @param x Python object
#'
#' @keywords internal
#'
#' @export
py_to_r_wrapper <- function(x) {
UseMethod("py_to_r_wrapper")
}
#' @export
py_to_r_wrapper.default <- function(x) {
x
}
#' @export
r_to_py.factor <- function(x, convert = FALSE) {
if (inherits(x, "ordered"))
warning("converting ordered factor to character; ordering will be lost")
r_to_py_impl(as.character(x), convert = convert)
}
#' @export
r_to_py.POSIXt <- function(x, convert = FALSE) {
tz <- attr(x, "tzone", TRUE)
## POSIXlt tzone is a length-3 vec, where "" means local/missing tzone
if(length(tz))
tz <- tz[[1L]] %""% NULL
# we prefer numpy datetime64 for efficiency
# numpy doesn't support tzone
# (internally, always stored from UTC epoch, like POSIXct in R)
if (is.null(tz) && py_module_available("numpy")) {
np_array <- r_to_py_impl(array(as.double(x) * 1E9), convert = FALSE)
return(np_array$astype(dtype = "datetime64[ns]"))
}
if(!is.null(tz)) {
if(py_version() >= "3.9")
tz <- import("zoneinfo", convert = FALSE)$ZoneInfo(tz)
else
tz <- import("pytz")$timezone(tz)
}
datetime <- import("datetime", convert = FALSE)
datetime$datetime$fromtimestamp(as.double(x), tz = tz)
}
#' @export
py_to_r.datetime.datetime <- function(x) {
if (py_version() >= "3") {
tz <- NULL
if (!is.null(x$tzinfo)) {
# in Python 3.9, there is a new zoneinfo.ZoneInfo class that
# accepts Olsonnames, similar to R's tz= semantics.
# Try to find the user supplied value in that case.
# Note that accessing `ZoneInfo.tzname()` is lossy. Eg.
# doing `ZoneInfo("America/New_York").tzname()` returns "EDT", which is
# not in R's OlsonNames() database, and also not stable wrt DST status.
# pytz.zone, round-trips better
if(is.null(tz))
tz <- as_r_value(py_get_attr(x$tzinfo, "zone", TRUE))
# zoneinfo.ZoneInfo(), roundtrip lossy (standardized), but in py stdlib
if (is.null(tz))
tz <- as_r_value(py_get_attr(x$tzinfo, "key", TRUE))
# common to pytz.timezone, ZoneInfo, other subclasses
if (is.null(tz))
tryCatch(tz <- as_r_value(x$tzname()), error = identity)
}
# TODO: if tzname() raised NotImplemented,
# - restore last user facing python exception,
# - have a fallback trying to construct a tz string w/ utcoffset().
return(.POSIXct(as_r_value(x$timestamp()), tz = tz))
}
# old python2 compat code.
# mangles tzinfo attribute: https://github.com/rstudio/reticulate/issues/1265
disable_conversion_scope(x)
# convert to POSIX time
time <- import("time", convert = TRUE)
posix <- time$mktime(x$timetuple())
# include microseconds as well
ms <- as_r_value(x$microsecond)
posix <- posix + as.numeric(ms) * 1E-6
# TODO: handle non-UTC timezones?
as.POSIXct(posix, origin = "1970-01-01", tz = "UTC")
}
#' @export
r_to_py.Date <- function(x, convert = FALSE) {
r_convert_date(x, convert)
}
#' @export
py_to_r.datetime.date <- function(x) {
iso <- as_r_value(x$isoformat())
as.Date.character(iso)
}
#' @export
py_to_r.collections.OrderedDict <- function(x) {
keys <- py_dict_get_keys(x)
py_set_convert(keys, FALSE)
local_conversion_scope(x, TRUE)
result <- lapply(seq_len(length(keys)) - 1L, function(i) {
py_dict_get_item(x, keys[i])
})
names(result) <- py_dict_get_keys_as_str(x)
result
}
#' @export
py_to_r.pandas.core.series.Series <- function(x) {
local_conversion_scope(x, FALSE)
values <- py_to_r(x$values)
index <- py_to_r(x$index)
bt <- import("builtins")
names(values) <- as.character(bt$list(index$map(bt$str)))
values
}
#' @export
py_to_r.pandas.core.categorical.Categorical <- function(x) {
local_conversion_scope(x, FALSE)
values <- py_to_r(x$get_values())
levels <- py_to_r(x$categories$values)
ordered <- py_to_r(x$dtype$ordered)
factor(values, levels = levels, ordered = ordered)
}
#' @export
py_to_r.pandas.core.arrays.categorical.Categorical <-
py_to_r.pandas.core.categorical.Categorical
#' @export
py_to_r.pandas._libs.missing.NAType <- function(x) {
NA
}
#' @export
py_to_r.pandas._libs.missing.C_NAType <- function(x) {
NA
}
py_object_shape <- function(object) {
unlist(as_r_value(object$shape))
}
#' @export
summary.pandas.core.series.Series <- function(object, ...) {
if (py_is_null_xptr(object) || !py_available())
str(object)
else
object$describe()
}
#' @export
length.pandas.core.series.Series <- function(x) {
if (py_is_null_xptr(x) || !py_available())
0L
else {
py_object_shape(x)[[1]]
}
}
#' @export
dim.pandas.core.series.Series <- function(x) {
NULL
}
#' @export
r_to_py.data.frame <- function(x, convert = FALSE) {
# if we don't have pandas, just use default implementation
if (!py_module_available("pandas"))
return(r_to_py_impl(x, convert = convert))
pd <- import("pandas", convert = FALSE)
# manually convert each column to associated Python vector type
columns <- r_convert_dataframe(x, convert = convert)
# generate DataFrame from dictionary
pdf <- pd$DataFrame$from_dict(columns)
# copy over row names if they exist
rni <- .row_names_info(x, type = 0L)
if (is.character(rni)) {
if (length(rni) == 1)
rni <- as.list(rni)
pdf$index <- rni
}
# re-order based on original columns
if (length(x) > 1)
pdf <- pdf$reindex(columns = names(x))
pdf
}
#' @export
py_to_r.datatable.Frame <- function(x) {
# TODO: it would be nice to avoid the extra conversion to pandas
py_to_r(x$to_pandas())
}
#' @export
py_to_r.pandas.core.frame.DataFrame <- function(x) {
local_conversion_scope(x, FALSE)
np <- import("numpy", convert = TRUE)
pandas <- import("pandas", convert = TRUE)
bt <- import("builtins", convert = TRUE)
# delegate to c++
converted <- py_convert_pandas_df(x)
converted <- lapply(converted, function(col) {
if (identical(length(dim(col)), 1L))
dim(col) <- NULL
col
})
# assign colnamnes
columns <- x$columns$values
columns <- as.character(bt$list(x$columns$map(bt$str)))
names(converted) <- columns
df <- converted
attr(df, "row.names") <- c(NA_integer_, py_to_r(-x$shape[[0L]]))
class(df) <- "data.frame"
# attempt to copy over index, and set as rownames when appropriate
#
# TODO: should we tag the R data.frame with the original Python index
# object in case users need it?
#
# TODO: Pandas allows for a large variety of index formats; we should
# try to explicitly whitelist a small family which we can represent
# effectively in R
index <- x$index
# tag the returned object with the Python index, in case
# the user needs to explicitly access / munge the index
# for some need
attr(df, "pandas.index") <- index
if (inherits(index, c("pandas.core.indexes.base.Index",
"pandas.indexes.base.Index"))) {
if (inherits(index, c("pandas.core.indexes.range.RangeIndex",
"pandas.indexes.range.RangeIndex")) &&
np$issubdtype(index$dtype, np$number))
{
# check for a range index from 0 -> n. in such a case, we don't need
# to copy or translate the index. note that we need to translate from
# Python's 0-based indexing to R's one-based indexing.
#
# NOTE: `_start` and friends were deprecated with Pandas 0.25.0,
# with non-private versions preferred for access instead
if (reticulate::py_has_attr(index, "start"))
{
start <- py_to_r(index[["start"]])
stop <- py_to_r(index[["stop"]])
step <- py_to_r(index[["step"]])
}
else
{
start <- py_to_r(index[["_start"]])
stop <- py_to_r(index[["_stop"]])
step <- py_to_r(index[["_step"]])
}
if (start != 0 || stop != nrow(df) || step != 1) {
values <- tryCatch(py_to_r(index$values), error = identity)
if (is.numeric(values)) {
rownames(df) <- values + 1
}
}
}
else if (inherits(index, c("pandas.core.indexes.datetimes.DatetimeIndex",
"pandas.tseries.index.DatetimeIndex"))) {
converted <- tryCatch(py_to_r(index$values), error = identity)
tz <- index[["tz"]]
if (inherits(tz, "pytz.tzinfo.BaseTzInfo") ||
inherits(tz, "pytz.UTC"))
{
zone <- tryCatch(py_to_r(tz$zone), error = function(e) NULL)
if (!is.null(zone) && zone %in% OlsonNames())
attr(converted, "tzone") <- zone
}
rownames(df) <- converted
}
else {
converted <- tryCatch(py_to_r(index$values), error = identity)
if (is.character(converted) || is.numeric(converted)) {
if (any(duplicated(converted))) {
warning("index contains duplicated values: row names not set")
} else {
rownames(df) <- converted
}
}
}
}
df
}
#' @export
summary.pandas.core.frame.DataFrame <- summary.pandas.core.series.Series
#' @export
length.pandas.core.frame.DataFrame <- function(x) {
if (py_is_null_xptr(x) || !py_available())
0L
else {
py_object_shape(x)[[2]]
}
}
#' @export
dim.pandas.core.frame.DataFrame <- function(x) {
if (py_is_null_xptr(x) || !py_available())
NULL
else
py_object_shape(x)
}
# Scipy sparse matrices
#' @importFrom Matrix Matrix
#' @export
dim.scipy.sparse.base.spmatrix <- function(x) {
if (py_is_null_xptr(x) || !py_available())
NULL
else
py_object_shape(x)
}
#' @export
length.scipy.sparse.base.spmatrix <- function(x) {
if (py_is_null_xptr(x) || !py_available())
0L
else
prod(py_object_shape(x))
}
#' @export
py_to_r.scipy.sparse.base.spmatrix <- function(x) {
py_to_r(x$tocsc())
}
#' @importFrom methods as
#' @export
r_to_py.sparseMatrix <- function(x, convert = FALSE) {
x <- if (package_version(as.vector(getNamespaceVersion("Matrix"))) >= "1.4-2")
as(as(as(x, "dMatrix"), "generalMatrix"), "CsparseMatrix")
else
as(x, "dgCMatrix")
r_to_py(x, convert = convert)
}
# Conversion between `Matrix::dgCMatrix` and `scipy.sparse.csc.csc_matrix`.
# Scipy CSC Matrix: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html
#' @export
r_to_py.dgCMatrix <- function(x, convert = FALSE) {
# use default implementation if scipy is not available
if (!py_module_available("scipy"))
return(r_to_py_impl(x, convert = convert))
sp <- import("scipy.sparse", convert = FALSE)
csc_x <- sp$csc_matrix(
tuple(
np_array(x@x), # Data array of the matrix
np_array(x@i), # CSC format index array
np_array(x@p), # CSC format index pointer array
convert = FALSE
),
shape = dim(x)
)
if (any(dim(x) != dim(csc_x)))
stop(
paste(
"Failed to convert: dimensions of the original Matrix::dgCMatrix ",
"object (", dim(x), ") and the converted Scipy CSC matrix (",
dim(csc_x), ") do not match", sep="", collapse=", "))
csc_x
}
#' @importFrom methods new
#' @export
py_to_r.scipy.sparse.csc.csc_matrix <- function(x) {
disable_conversion_scope(x)
x <- x$sorted_indices()
new(
"dgCMatrix",
i = as.integer(as_r_value(x$indices)),
p = as.integer(as_r_value(x$indptr)),
x = as.vector(as_r_value(x$data)),
Dim = as.integer(dim(x))
)
}
# Conversion between `Matrix::dgRMatrix` and `scipy.sparse.csr.csr_matrix`.
# Scipy CSR Matrix: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html
#' @export
r_to_py.dgRMatrix <- function(x, convert = FALSE) {
# use default implementation if scipy is not available
if (!py_module_available("scipy"))
return(r_to_py_impl(x, convert = convert))
sp <- import("scipy.sparse", convert = FALSE)
csr_x <- sp$csr_matrix(
tuple(
np_array(x@x), # Data array of the matrix
np_array(x@j), # CSR format index array
np_array(x@p), # CSR format index pointer array
convert = FALSE
),
shape = dim(x)
)
if (any(dim(x) != dim(csr_x)))
stop(
paste(
"Failed to convert: dimensions of the original Matrix::dgRMatrix ",
"object (", dim(x), ") and the converted Scipy CSR matrix (",
dim(csr_x), ") do not match", sep="", collapse=", "))
csr_x
}
#' @export
py_to_r.scipy.sparse.csr.csr_matrix <- function(x) {
disable_conversion_scope(x)
x <- x$sorted_indices()
new(
"dgRMatrix",
j = as.integer(as_r_value(x$indices)),
p = as.integer(as_r_value(x$indptr)),
x = as.vector(as_r_value(x$data)),
Dim = as.integer(dim(x))
)
}
# Conversion between `Matrix::dgTMatrix` and `scipy.sparse.coo.coo_matrix`.
# Scipy COO Matrix: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html
#' @export
r_to_py.dgTMatrix <- function(x, convert = FALSE) {
# use default implementation if scipy is not available
if (!py_module_available("scipy"))
return(r_to_py_impl(x, convert = convert))
sp <- import("scipy.sparse", convert = FALSE)
coo_x <- sp$coo_matrix(
tuple(
np_array(x@x), # Data array of the matrix
tuple(
np_array(x@i),
np_array(x@j),
convert = FALSE
),
convert = FALSE
),
shape = dim(x)
)
if (any(dim(x) != dim(coo_x)))
stop(
paste(
"Failed to convert: dimensions of the original Matrix::dgTMatrix ",
"object (", dim(x), ") and the converted Scipy COO matrix (",
dim(coo_x), ") do not match", sep="", collapse=", "))
coo_x
}
#' @importFrom methods new
#' @export
py_to_r.scipy.sparse.coo.coo_matrix <- function(x) {
disable_conversion_scope(x)
new(
"dgTMatrix",
i = as.integer(as_r_value(x$row)),
j = as.integer(as_r_value(x$col)),
x = as.vector(as_r_value(x$data)),
Dim = as.integer(dim(x))
)
}
r_convert_dataframe_column <- function(column, convert) {
pd <- import("pandas", convert = FALSE)
if (is.factor(column)) {
pd$Categorical(as.list(as.character(column)),
categories = as.list(levels(column)),
ordered = inherits(column, "ordered"))
} else if (is.numeric(column) || is.character(column)) {
np_array(column)
} else if (inherits(column, "POSIXt")) {
np_array(as.numeric(column) * 1E9, dtype = "datetime64[ns]")
} else {
r_to_py(column)
}
}
# workaround for deprecation of packages in scipy 1.8.0
#' @export
dim.scipy.sparse._base.spmatrix <- dim.scipy.sparse.base.spmatrix
#' @export
length.scipy.sparse._base.spmatrix <- length.scipy.sparse.base.spmatrix
#' @export
py_to_r.scipy.sparse._base.spmatrix <- py_to_r.scipy.sparse.base.spmatrix
#' @export
py_to_r.scipy.sparse._csc.csc_matrix <- py_to_r.scipy.sparse.csc.csc_matrix
#' @export
py_to_r.scipy.sparse._csr.csr_matrix <- py_to_r.scipy.sparse.csr.csr_matrix
#' @export
py_to_r.scipy.sparse._coo.coo_matrix <- py_to_r.scipy.sparse.coo.coo_matrix
# updated locations for scipy 1.11.0
#' @export
dim.scipy.sparse._base._spbase <- dim.scipy.sparse.base.spmatrix
#' @export
length.scipy.sparse._base._spbase <- length.scipy.sparse.base.spmatrix
#' @export
py_to_r.scipy.sparse._matrix.spmatrix <- py_to_r.scipy.sparse.base.spmatrix
#' @export
as.data.frame.polars.dataframe.frame.DataFrame <-
function(x, row.names = NULL, optional = FALSE, ...) {
py_set_convert(x$to_pandas, TRUE)()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.