trustgraph/trustgraph-parquet/scripts/dump-parquet

25 lines
416 B
Text
Raw Normal View History

2024-07-23 22:53:54 +01:00
#!/usr/bin/env python3
2024-07-25 21:43:55 +01:00
import pyarrow as pa
import pyarrow.csv as pc
2024-07-23 22:53:54 +01:00
import pyarrow.parquet as pq
2024-07-25 21:43:55 +01:00
import pandas as pd
2024-07-23 22:53:54 +01:00
import sys
2024-07-25 21:43:55 +01:00
df = None
2024-07-23 22:53:54 +01:00
for file in sys.argv[1:]:
2024-07-25 21:43:55 +01:00
part = pq.read_table(file).to_pandas()
if df is None:
df = part
else:
df = pd.concat([df, part], ignore_index=True)
2024-07-23 22:53:54 +01:00
2024-07-25 21:43:55 +01:00
if df is not None:
2024-07-23 22:53:54 +01:00
2024-07-25 21:43:55 +01:00
table = pa.Table.from_pandas(df)
pc.write_csv(table, sys.stdout.buffer)
2024-07-23 22:53:54 +01:00