Source code for tstore.archive.ts.readers.pyarrow

#!/usr/bin/env python3
"""
Created on Mon Jun 12 22:19:51 2023.

@author: ghiggi
"""

import pyarrow.parquet as pq

from tstore.archive.ts.utility import get_time_filters


[docs] def open_ts( fpath, partitions, start_time=None, end_time=None, inclusive=None, columns=None, split_row_groups=False, # noqa: ARG001 # pyarrow-specific filesystem=None, use_threads=True, ): """Open a TS into a pyarrow.Table.""" # Define filters argument filters = get_time_filters(start_time=start_time, end_time=end_time, inclusive=inclusive) # Read Option 1 # - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html#pyarrow-parquet-read-table table = pq.read_table( fpath, use_pandas_metadata=True, columns=columns, filters=filters, filesystem=filesystem, use_threads=use_threads, ) # Read Option 2 # - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html # Create a ParquetDataset object # dataset = pq.ParquetDataset(fpath, # filters=filters, # filesystem=filesystem, # # Specific to ParquetDataset # # metadata_nthreads=1, # split_row_groups=split_row_groups, # ) # table = dataset.read(columns=columns, # use_pandas_metadata=True, # use_threads=use_threads, # # ) # Remove partitioning columns table = table.drop(partitions) return table