June 2, 2025
DuckLake is a new integrated data lake and catalog format from the DuckDB team. It uses a SQL database for all catalog and table metadata, and stores data in Apache Parquet files. Most users interact with DuckLake using SQL, but for those who prefer dataframes there’s Ibis—a portable Python dataframe library with support for DuckDB as a backend. To get started using DuckLake with Ibis, follow the steps below.
Create a new project with uv:
uv init --bare ducklake-ibis
cd ducklake-ibis
Install DuckDB, Ibis and JupyterLab:
uv add duckdb 'ibis-framework[duckdb,examples]' jupyterlab
Launch JupyterLab:
uv run jupyter lab
Open localhost:8888
in your browser, create a new Python notebook, then incrementally add and run the code below:
import ibis
ibis.options.interactive = True
con = ibis.duckdb.connect(extensions="ducklake")
con.attach("ducklake:my_ducklake.ducklake")
con.list_catalogs()
['__ducklake_metadata_my_ducklake', 'memory', 'my_ducklake', 'system', 'temp']
con.raw_sql("USE my_ducklake")
<duckdb.duckdb.DuckDBPyConnection at 0x1068e1e30>
con.create_table(
"penguins",
ibis.examples.penguins.fetch().to_pyarrow(),
overwrite=True
)
DatabaseTable: my_ducklake.main.penguins
species string
island string
bill_length_mm float64
bill_depth_mm float64
flipper_length_mm int64
body_mass_g int64
sex string
year int64
con.list_tables()
['penguins']
penguins = con.table("penguins")
penguins
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │
│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │
│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │
│ Adelie │ Torgersen │ NULL │ NULL │ NULL │ NULL │ NULL │ 2007 │
│ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ 3450 │ female │ 2007 │
│ Adelie │ Torgersen │ 39.3 │ 20.6 │ 190 │ 3650 │ male │ 2007 │
│ Adelie │ Torgersen │ 38.9 │ 17.8 │ 181 │ 3625 │ female │ 2007 │
│ Adelie │ Torgersen │ 39.2 │ 19.6 │ 195 │ 4675 │ male │ 2007 │
│ Adelie │ Torgersen │ 34.1 │ 18.1 │ 193 │ 3475 │ NULL │ 2007 │
│ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ 4250 │ NULL │ 2007 │
│ … │ … │ … │ … │ … │ … │ … │ … │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
penguins.group_by("species").count()
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃ species ┃ CountStar(penguins) ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ string │ int64 │
├───────────┼─────────────────────┤
│ Gentoo │ 124 │
│ Adelie │ 152 │
│ Chinstrap │ 68 │
└───────────┴─────────────────────┘
con.raw_sql("USE __ducklake_metadata_my_ducklake")
<duckdb.duckdb.DuckDBPyConnection at 0x1068e1e30>
con.list_tables()
['ducklake_column',
'ducklake_column_tag',
'ducklake_data_file',
'ducklake_delete_file',
'ducklake_file_column_statistics',
'ducklake_file_partition_value',
'ducklake_files_scheduled_for_deletion',
'ducklake_inlined_data_tables',
'ducklake_metadata',
'ducklake_partition_column',
'ducklake_partition_info',
'ducklake_schema',
'ducklake_snapshot',
'ducklake_snapshot_changes',
'ducklake_table',
'ducklake_table_column_stats',
'ducklake_table_stats',
'ducklake_tag',
'ducklake_view']
con.table("ducklake_table_column_stats")
┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃ table_id ┃ column_id ┃ contains_null ┃ contains_nan ┃ min_value ┃ max_value ┃
┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ int64 │ int64 │ boolean │ boolean │ string │ string │
├──────────┼───────────┼───────────────┼──────────────┼───────────┼───────────┤
│ 1 │ 1 │ False │ NULL │ Adelie │ Gentoo │
│ 1 │ 2 │ False │ NULL │ Biscoe │ Torgersen │
│ 1 │ 3 │ True │ False │ 32.1 │ 59.6 │
│ 1 │ 4 │ True │ False │ 13.1 │ 21.5 │
│ 1 │ 5 │ True │ NULL │ 172 │ 231 │
│ 1 │ 6 │ True │ NULL │ 2700 │ 6300 │
│ 1 │ 7 │ True │ NULL │ female │ male │
│ 1 │ 8 │ False │ NULL │ 2007 │ 2009 │
└──────────┴───────────┴───────────────┴──────────────┴───────────┴───────────┘
con.disconnect()