Querying data
Execute a query
import redivis
# Execute any SQL query and read the results
query = redivis.query("SELECT 1 + 1 AS two, 'foo' AS bar")
query.to_pandas_dataframe()
# two bar
# 0 2 foo
# The query can reference any table on Redivis
query = redivis.query("""
SELECT *
FROM demo.iris_species.iris
WHERE SepalLengthCm > 5
""")
query.to_pandas_dataframe()
# Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
# 0 33 5.2 4.1 1.5 0.1 Iris-setosa
# ...
# Other methods to read data:
# query.to_arrow_batch_iterator()
# query.to_arrow_dataset()
# query.to_arrow_dataset()
# query.to_geopandas_dataframe()
# query.to_dask_dataframe()
# query.to_polars_lazyframe()
Execute a scoped query
import redivis
# Perform a query on the Demo CMS Medicare data.
# Table at https://redivis.com/datasets/349j-7phs91amz/tables
# To simplify table references, execute a query scoped to a dataset or workflow
dataset = redivis.organization("Demo").dataset("CMS 2014 Medicare Data")
query = dataset.query("""
SELECT
hospice_providers.name,
inpatient_charges.drg_definition
-- The tables inpatient_chargers, hospice_providers are assumed to be
-- within the scoped dataset
FROM inpatient_charges
INNER JOIN hospice_providers
ON hospice_providers.provider_id = inpatient_charges.provider_id
""")
Run a query within a Redivis notebook
import redivis
# In a notebook, all queries are scoped to the current workflow.
# Additionally, the notebooks source table can simply be referenced as _source_
query = redivis.query("SELECT * FROM _source_ LIMIT 10")
Last updated