2024-07-23 22:53:54 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
2024-07-25 21:43:55 +01:00
|
|
|
import pyarrow as pa
|
|
|
|
|
import pyarrow.csv as pc
|
2024-07-23 22:53:54 +01:00
|
|
|
import pyarrow.parquet as pq
|
2024-07-25 21:43:55 +01:00
|
|
|
import pandas as pd
|
2024-07-23 22:53:54 +01:00
|
|
|
import sys
|
|
|
|
|
|
2024-07-25 21:43:55 +01:00
|
|
|
df = None
|
|
|
|
|
|
2024-07-23 22:53:54 +01:00
|
|
|
for file in sys.argv[1:]:
|
|
|
|
|
|
2024-07-25 21:43:55 +01:00
|
|
|
part = pq.read_table(file).to_pandas()
|
|
|
|
|
|
|
|
|
|
if df is None:
|
|
|
|
|
df = part
|
|
|
|
|
else:
|
|
|
|
|
df = pd.concat([df, part], ignore_index=True)
|
2024-07-23 22:53:54 +01:00
|
|
|
|
2024-07-25 21:43:55 +01:00
|
|
|
if df is not None:
|
2024-07-23 22:53:54 +01:00
|
|
|
|
2024-07-25 21:43:55 +01:00
|
|
|
table = pa.Table.from_pandas(df)
|
|
|
|
|
pc.write_csv(table, sys.stdout.buffer)
|
2024-07-23 22:53:54 +01:00
|
|
|
|