#!/usr/bin/env python3 """ Concatenates multiple parquet files into a single parquet output """ import pyarrow as pa import pyarrow.parquet as pq import pandas as pd import sys import argparse parser = argparse.ArgumentParser( prog="combine-parquet", description=__doc__ ) parser.add_argument( '-i', '--input', nargs='*', help=f'Input files' ) parser.add_argument( '-o', '--output', help=f'Output files' ) args = parser.parse_args() df = None for file in args.input: part = pq.read_table(file).to_pandas() if df is None: df = part else: df = pd.concat([df, part], ignore_index=True) if df is not None: table = pa.Table.from_pandas(df) pq.write_table(table, args.output)