Source code for tstore.archive.ts.readers.polars
#!/usr/bin/env python3
"""
Created on Mon Jun 12 22:19:51 2023.
@author: ghiggi
"""
import polars as pl
[docs]
def open_ts(
fpath,
partitions,
start_time=None, # noqa: ARG001
end_time=None, # noqa: ARG001
# Options
rechunk=True,
use_statistics=True,
hive_partitioning=True,
storage_options=None,
low_memory=False,
# lazy option
lazy=True,
# in-memory only options
columns=None,
use_pyarrow=False,
parallel=True,
):
"""Open a TS into a polars Frame."""
# TODO: can we efficiently filter at parquet read time? see https://github.com/pola-rs/polars/issues/3964
# https://docs.pola.rs/py-polars/html/reference/api/polars.read_parquet.html
# https://docs.pola.rs/py-polars/html/reference/api/polars.scan_parquet.html
if lazy:
df_pl = pl.scan_parquet(
fpath,
rechunk=rechunk,
storage_options=storage_options,
hive_partitioning=hive_partitioning,
use_statistics=use_statistics,
low_memory=low_memory,
)
# Filter columns if not None
else:
df_pl = pl.read_parquet(
fpath,
rechunk=rechunk,
storage_options=storage_options,
hive_partitioning=hive_partitioning,
use_statistics=use_statistics,
low_memory=low_memory,
# Others
columns=columns,
use_pyarrow=use_pyarrow,
parallel=parallel,
)
# Filter by start_time and end_time
# - TODO
# Remove partitioning columns
df_pl = df_pl.drop(partitions)
return df_pl