Source code for tstore.archive.ts.readers.pyarrow

#!/usr/bin/env python3
"""
Created on Mon Jun 12 22:19:51 2023.

@author: ghiggi
"""

import pyarrow.parquet as pq

from tstore.archive.ts.utility import get_time_filters



[docs]
def open_ts(
    fpath,
    partitions,
    start_time=None,
    end_time=None,
    inclusive=None,
    columns=None,
    split_row_groups=False,  # noqa: ARG001
    # pyarrow-specific
    filesystem=None,
    use_threads=True,
):
    """Open a TS into a pyarrow.Table."""
    # Define filters argument
    filters = get_time_filters(start_time=start_time, end_time=end_time, inclusive=inclusive)

    # Read Option 1
    # - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html#pyarrow-parquet-read-table
    table = pq.read_table(
        fpath,
        use_pandas_metadata=True,
        columns=columns,
        filters=filters,
        filesystem=filesystem,
        use_threads=use_threads,
    )
    # Read Option 2
    # - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
    # Create a ParquetDataset object
    # dataset = pq.ParquetDataset(fpath,
    #                             filters=filters,
    #                             filesystem=filesystem,
    #                             # Specific to ParquetDataset
    #                             # metadata_nthreads=1,
    #                             split_row_groups=split_row_groups,

    # )
    # table = dataset.read(columns=columns,
    #                       use_pandas_metadata=True,
    #                       use_threads=use_threads,
    #
    # )

    # Remove partitioning columns
    table = table.drop(partitions)
    return table