mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-28 09:56:22 +02:00
Feature/subpackages (#80)
* Renaming what will become the core package * Tweaking to get package build working * Fix metering merge * Rename to core directory * Bump version. Use namespace searching for packaging trustgraph-core * Change references to trustgraph-core * Forming embeddings-hf package * Reference modules in core package. * Build both packages to one container, bump version * Update YAMLs
This commit is contained in:
parent
14d79ef9f1
commit
f081933217
303 changed files with 681 additions and 624 deletions
24
trustgraph-core/scripts/dump-parquet
Executable file
24
trustgraph-core/scripts/dump-parquet
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.csv as pc
|
||||
import pyarrow.parquet as pq
|
||||
import pandas as pd
|
||||
import sys
|
||||
|
||||
df = None
|
||||
|
||||
for file in sys.argv[1:]:
|
||||
|
||||
part = pq.read_table(file).to_pandas()
|
||||
|
||||
if df is None:
|
||||
df = part
|
||||
else:
|
||||
df = pd.concat([df, part], ignore_index=True)
|
||||
|
||||
if df is not None:
|
||||
|
||||
table = pa.Table.from_pandas(df)
|
||||
pc.write_csv(table, sys.stdout.buffer)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue