Source code for tstore.archive.ts.readers.polars

#!/usr/bin/env python3
"""
Created on Mon Jun 12 22:19:51 2023.

@author: ghiggi
"""

import polars as pl



[docs]
def open_ts(
    fpath,
    partitions,
    start_time=None,  # noqa: ARG001
    end_time=None,  # noqa: ARG001
    # Options
    rechunk=True,
    use_statistics=True,
    hive_partitioning=True,
    storage_options=None,
    low_memory=False,
    # lazy option
    lazy=True,
    # in-memory only options
    columns=None,
    use_pyarrow=False,
    parallel=True,
):
    """Open a TS into a polars Frame."""
    # TODO: can we efficiently filter at parquet read time? see https://github.com/pola-rs/polars/issues/3964
    # https://docs.pola.rs/py-polars/html/reference/api/polars.read_parquet.html
    # https://docs.pola.rs/py-polars/html/reference/api/polars.scan_parquet.html
    if lazy:
        df_pl = pl.scan_parquet(
            fpath,
            rechunk=rechunk,
            storage_options=storage_options,
            hive_partitioning=hive_partitioning,
            use_statistics=use_statistics,
            low_memory=low_memory,
        )
        # Filter columns if not None
    else:
        df_pl = pl.read_parquet(
            fpath,
            rechunk=rechunk,
            storage_options=storage_options,
            hive_partitioning=hive_partitioning,
            use_statistics=use_statistics,
            low_memory=low_memory,
            # Others
            columns=columns,
            use_pyarrow=use_pyarrow,
            parallel=parallel,
        )

    # Filter by start_time and end_time
    # - TODO

    # Remove partitioning columns
    df_pl = df_pl.drop(partitions)

    return df_pl