knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
The cloudosR package provides an R client for interacting with the Lifebit Platform. It enables you to:
# Install from CRAN (recommended) install.packages("cloudosR") # Or install development version from GitHub devtools::install_github("lifebit-ai/cloudosR") # Or install from local source install.packages("path/to/cloudosR", repos = NULL, type = "source")
Before using any API functions, you need to configure a profile with your Lifebit Platform credentials.
library(cloudosR) # Configure a profile with your credentials cloudos.configure( profilename = "production", apikey = "your-api-key-here", workspace_id = "your-workspace-id", base_url = "https://cloudos.lifebit.ai", # Optional, this is the default set_default = TRUE # Set as default profile )
The configuration is stored securely in your R user config directory with restricted permissions (0600). You can find the location with tools::R_user_dir("cloudosR", "config").
When you set set_default = TRUE, you don't need to specify the profilename parameter in subsequent function calls:
# With default profile set, you can omit profilename results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT * FROM table LIMIT 10" )
You can configure multiple profiles for different environments:
# Production profile cloudos.configure( profilename = "production", apikey = "prod-api-key", workspace_id = "prod-workspace-id", set_default = TRUE ) # Staging profile cloudos.configure( profilename = "staging", apikey = "staging-api-key", workspace_id = "staging-workspace-id" ) # List all configured profiles profiles <- cloudos.profile_list() print(profiles)
The package provides both high-level and low-level functions for executing SQL queries.
The cloudos.query() function handles the entire query lifecycle automatically:
# Simple query (uses default profile) results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT person_id, gender_concept_id, birth_datetime FROM person LIMIT 10" ) # View results head(results) str(results) # Access metadata attr(results, "total_rows") attr(results, "total_pages")
For more control over the query process, you can use the individual steps:
# Step 1: Submit query with pagination task <- cloudos.query_submit_async( profilename = "production", cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT * FROM table LIMIT 10", pagination = list(pageNumber = 0, pageSize = 100) # Optional ) task_id <- task$task_id print(task_id) # Step 2: Check status status <- cloudos.query_status( profilename = "production", task_id = task_id ) print(status$status) # "pending", "running", "completed", or "failed" print(status$count_of_results) # Step 3: Fetch results when completed results <- cloudos.query_results( profilename = "production", task_id = task_id )
The package automatically handles pagination by submitting multiple async tasks:
# Fetch all pages automatically (default) # Note: This submits separate tasks for each page all_results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT * FROM person", page_size = 1000, # Rows per page (default: 1000) all_pages = TRUE # Fetch all pages (default: TRUE) ) # This will: # 1. Submit query for page 0 # 2. Wait for completion and fetch results # 3. Calculate total pages from response metadata # 4. Submit separate tasks for pages 1, 2, 3... # 5. Wait for all tasks to complete # 6. Combine and return all results # Fetch only first page first_page <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT * FROM person", page_size = 100, all_pages = FALSE # Only fetch first page )
You can customize how the package polls for query completion:
results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT COUNT(*) FROM person", poll_interval = 5, # Check every 5 seconds (default: 2) max_wait = 900 # Wait up to 15 minutes (default: 600) )
The package supports any valid SQL query that your cohort allows:
# Join multiple tables sql_query <- " SELECT p.person_id, p.gender_concept_id, c.condition_concept_id, c.condition_start_date FROM person p JOIN condition_occurrence c ON p.person_id = c.person_id WHERE p.birth_datetime > '1980-01-01' LIMIT 1000 " results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = sql_query )
.cloudos_config.json to your .gitignore fileThe package provides informative error messages:
# Authentication errors tryCatch({ cloudos.query( profilename = "invalid_profile", cohort_id = "123", sql = "SELECT 1" ) }, error = function(e) { message("Error caught: ", e$message) }) # Query timeout tryCatch({ cloudos.query( cohort_id = "123", sql = "SELECT * FROM large_table", max_wait = 5 # Very short timeout ) }, error = function(e) { message("Query timed out: ", e$message) })
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.