Querying data

Execute a query

import redivis

# Execute any SQL query and read the results
query = redivis.query("SELECT 1 + 1 AS two, 'foo' AS bar")
query.to_pandas_dataframe()
# 	two	bar
# 0	2	foo

# The query can reference any table on Redivis 
query = redivis.query("""
    SELECT * 
    FROM demo.iris_species.iris 
    WHERE SepalLengthCm > 5
""")
query.to_pandas_dataframe()
# 	Id	SepalLengthCm	SepalWidthCm	PetalLengthCm	PetalWidthCm	Species
# 0	33	5.2	        4.1	        1.5	        0.1	        Iris-setosa
# ...

# Other methods to read data:
# query.to_arrow_batch_iterator()
# query.to_arrow_dataset()
# query.to_arrow_dataset()
# query.to_geopandas_dataframe()
# query.to_dask_dataframe()
# query.to_polars_lazyframe()

Execute a scoped query

import redivis

# Perform a query on the Demo CMS Medicare data. 
# Table at https://redivis.com/datasets/349j-7phs91amz/tables

# To simplify table references, execute a query scoped to a dataset or workflow
dataset = redivis.organization("Demo").dataset("CMS 2014 Medicare Data")
query = dataset.query("""
    SELECT 
        hospice_providers.name, 
        inpatient_charges.drg_definition
    -- The tables inpatient_chargers, hospice_providers are assumed to be 
    -- within the scoped dataset
    FROM inpatient_charges
    INNER JOIN hospice_providers 
        ON hospice_providers.provider_id = inpatient_charges.provider_id
""")

Run a query within a Redivis notebook

import redivis

# In a notebook, all queries are scoped to the current workflow.
# Additionally, the notebooks source table can simply be referenced as _source_
query = redivis.query("SELECT * FROM _source_ LIMIT 10")

Last updated