Examples

Reading table data

1
table = (
2
redivis
3
.organization("Demo")
4
.dataset("CMS 2014 Medicare Data")
5
.table("Hospice providers")
6
)
7
8
# We can specify an optional max_results argument to only load some of the records
9
for row in table.list_rows(max_results=100):
10
print(row)
11
12
# Max results can also be the first positional argument
13
df = table.to_dataframe(100)
14
print(df)
Copied!

Querying data

Execute a query

1
import redivis
2
3
# Perform a query on the Demo CMS Medicare data. Table at https://redivis.com/demo/datasets/1754/tables
4
query = redivis.query("""
5
SELECT * FROM demo.cms_2014_medicare_data.home_health_agencies
6
WHERE state = 'CA'
7
""")
8
9
for row in query.list_rows():
10
print(row["agency_name"])
11
12
# We can also use data frames
13
df = query.to_dataframe();
14
print(df)
Copied!

Execute a scoped query

1
import redivis
2
3
# Perform a query on the Demo CMS Medicare data.
4
# Table at https://redivis.com/demo/datasets/1754/tablesd
5
6
# We don't need to include fully-qualified table names
7
# if we scope our query to the appropriate dataset or project
8
9
query = (
10
redivis
11
.organization("Demo")
12
.dataset("CMS 2014 Medicare Data")
13
.query("""
14
SELECT provider_name, average_total_payments
15
FROM nursing_facilities
16
INNER JOIN outpatient_charges USING (provider_id)
17
WHERE state = 'CA'
18
""")
19
)
20
21
for row in query.list_rows():
22
print(row.agency_name)
23
24
# We can also use data frames
25
df = query.to_dataframe();
26
print(df)
Copied!

Uploading data

Create a new dataset

1
import redivis
2
3
# Could also create a dataset under an organization:
4
# dataset = redivis.organization("Demo organization").dataset("some dataset")
5
dataset = redivis.user("your-username").dataset("some dataset")
6
7
# public_access_level can be one of ('none', 'overview', 'metadata', 'sample', 'data')
8
dataset.create(public_access_level="overview")
Copied!

Create a table and upload data

1
import redivis
2
3
dataset = redivis.user("user_name").dataset("dataset_name", version="next")
4
5
# Create a table on the dataset. Datasets may have multiple tables
6
table = (
7
dataset
8
.table("Table name")
9
.create(description="Some description")
10
)
11
12
# Upload a file to the table.
13
# You can create multiple uploads per table, in which case they'll be appended together.
14
upload = table.upload("data.csv")
15
16
with open("data.csv", "rb") as file:
17
upload.create(
18
file,
19
type="delmited",
20
remove_on_fail=True, # Remove the upload if a failure occurs
21
wait_for_finish=True, # Wait for the upload to finish processing
22
raise_on_fail=True # Raise an error on failure
23
)
Copied!

Stream data to an upload

1
import redivis
2
3
dataset = redivis.user("user_name").dataset("dataset_name", version="next")
4
table = dataset.table("table_name")
5
6
# schema is optional if update_schema is set to True on the insert_rows request
7
schema = [
8
{ "name": "var1", "type": "string" },
9
{ "name": "var2", "type": "integer" },
10
{ "name": "var3", "type": "dateTime" }
11
]
12
13
rows = [
14
{ "var1": "hello", "var2": 1, "var3": None },
15
# dateTime must be in the format YYYY-MM-DD[ |T]HH:MM:SS[.ssssss]
16
{ "var1": "world", "var2": 2, "var3": "2020-01-01T00:00:00.123" }
17
]
18
19
# Reference each upload with its name, which must be unique amongst other uploads
20
# for the current version of this table.
21
upload = table.upload(name="some_streamed_data")
22
23
# Only call create if the upload doesn't already exist
24
upload.create(
25
type="stream",
26
# schema is optional if update_schema is set to True on insert_rows
27
schema=schema,
28
# If True, will only create the upload if an upload with this name doesn't already exist
29
# Otherwise, a counter will be added to the name to preserve name uniqueness
30
if_not_exists=False,
31
# If skip_bad_records is True, ignore records that are incompatible with the existing schema.
32
# This has no effect when update_schema is set to True on the insert_rows request.
33
skip_bad_records=False # Optional, default is False
34
)
35
36
insert_response = upload.insert_rows(
37
rows,
38
# If update_schema is set to True, variables can be added by subsequent streams,
39
# and variable types will be relaxed if new values are incompatible with the previous type.
40
# If False, an error will be thrown if a row would cause a schema update,
41
# unless skip_bad_records is set to True on the upload (in which case they'll be ignored)
42
update_schema=False,
43
)
44
45
# See REST API / uploads / insertRows
46
print(insert_response)
47
Copied!

Release a new version

1
import redivis
2
3
dataset = redivis.organization("Demo").dataset("some dataset")
4
dataset.release()
Copied!

Create a subsequent version on an existing dataset

1
import redivis
2
3
dataset = redivis.user("your-username").dataset("some dataset")
4
5
# dataset.create_next_version will throw an error if a "next" version already exists,
6
# unless the ignore_if_exists argument is provided
7
dataset = dataset.create_next_version(ignore_if_exists=True)
8
table = dataset.table("table name")
9
10
# By default, all new data is appended to the previous version of a table.
11
# If you'd like to replace the previous data, update the upload_merge_strategy.
12
table.update(upload_merge_strategy="replace")
13
14
upload = table.upload("data.csv")
15
with open("data.csv", "rb") as file:
16
upload.create(
17
file,
18
# All additional params are optional; default values are shown here
19
type="delimited", # One of stream, delimited, csv, ndjson, avro, parquet, orc, xls, xlsx, dta, sas7bdat, sav
20
skip_bad_records=False,
21
has_header_row=True, # Only relevant for csv, xls(x)
22
remove_on_fail=True, # Remove the upload if a failure occurs
23
wait_for_finish=True, # Wait for the upload to finish processing
24
raise_on_fail=True # Raise an error on failure
25
26
# The following are only relevant for delimited files:
27
allow_quoted_newlines=False, # Allow newlines within cells. Setting to True will substantially reduce ingest performance.
28
quote_character='"', # The character used to escape delimiters within cells. Generally a double quote in compliant CSVs.
29
delimiter=None, # For delimited files, explicitly set the delimiter, otherwise the delimiter will be automatically inferred.
30
)
31
32
# When all uploads have finished, release the next version
33
dataset.release()
Copied!