feat: add tg-export-workspace / tg-import-workspace bundle commands (#877) (#1019)

Phase 1 (config only): export a workspace's full configuration as a
portable .tgx bundle (gzipped tar with manifest.json and one
pretty-printed, self-describing JSON file per config key under
config/<type>/), and import it into another deployment or workspace.

Import defaults to WorkspaceInit's re-run semantics (existing keys kept,
missing keys added; --overwrite replaces), supports --workspace rename,
--dry-run, and --config-only, and refuses to silently drop knowledge
data from future Phase-2 bundles it cannot import yet.
This commit is contained in:
Sunny Yang 2026-07-03 08:16:39 -06:00 committed by GitHub
parent c05296376e
commit 76c4763b9b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 561 additions and 0 deletions

View file

@ -116,6 +116,8 @@ tg-put-config-item = "trustgraph.cli.put_config_item:main"
tg-delete-config-item = "trustgraph.cli.delete_config_item:main"
tg-export-workspace-config = "trustgraph.cli.export_workspace_config:main"
tg-import-workspace-config = "trustgraph.cli.import_workspace_config:main"
tg-export-workspace = "trustgraph.cli.export_workspace:main"
tg-import-workspace = "trustgraph.cli.import_workspace:main"
tg-list-collections = "trustgraph.cli.list_collections:main"
tg-set-collection = "trustgraph.cli.set_collection:main"
tg-delete-collection = "trustgraph.cli.delete_collection:main"

View file

@ -0,0 +1,141 @@
"""
Exports a workspace's full configuration state as a portable .tgx bundle
(a gzipped tar archive) for backup, migration between deployments, or
sharing a pre-configured workspace.
The bundle is human-readable: a manifest.json plus one pretty-printed JSON
file per config key under config/<type>/, so it can be inspected and
hand-edited before import. Each entry file embeds its own type and key, so
filenames are cosmetic. Knowledge export (triples, documents, embeddings)
is not yet included; the manifest records that so future importers can
distinguish config-only bundles.
"""
import argparse
import io
import json
import os
import sys
import tarfile
import time
from urllib.parse import quote
from trustgraph.api import Api
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
MANIFEST_FORMAT = "tgx"
MANIFEST_FORMAT_VERSION = 1
def _add_bytes(tar, name, data):
info = tarfile.TarInfo(name=name)
info.size = len(data)
info.mtime = int(time.time())
tar.addfile(info, io.BytesIO(data))
def export_workspace(url, workspace, output, token=None):
api = Api(url, token=token, workspace=workspace).config()
config, version = api.all()
manifest = {
"format": MANIFEST_FORMAT,
"format_version": MANIFEST_FORMAT_VERSION,
"workspace": workspace,
"config_version": version,
"exported_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"contents": {"config": True, "knowledge": False},
}
count = 0
with tarfile.open(output, "w:gz") as tar:
_add_bytes(
tar, "manifest.json",
json.dumps(manifest, indent=2).encode("utf-8"),
)
for type_, entries in sorted(config.items()):
for key, raw in sorted(entries.items()):
# Config values are stored as JSON strings; parse so the
# bundle is pretty-printed and hand-editable. A value that
# isn't valid JSON is preserved verbatim.
try:
value = json.loads(raw)
except (TypeError, json.JSONDecodeError):
value = raw
entry = {"type": type_, "key": key, "value": value}
# Keys may contain path-unsafe characters; the entry embeds
# the real key, so the quoted filename is cosmetic only.
name = f"config/{quote(type_, safe='')}/{quote(key, safe='')}.json"
_add_bytes(
tar, name,
json.dumps(entry, indent=2).encode("utf-8"),
)
count += 1
print(f"Exported {count} config item(s) from workspace "
f"'{workspace}' to {output}", flush=True)
def main():
parser = argparse.ArgumentParser(
prog='tg-export-workspace',
description=__doc__,
)
parser.add_argument(
'-u', '--api-url',
default=default_url,
help=f'API URL (default: {default_url})',
)
parser.add_argument(
'-t', '--token',
default=default_token,
help='API token (default: TRUSTGRAPH_TOKEN environment variable)',
)
parser.add_argument(
'-w', '--workspace',
default=default_workspace,
help=f'Workspace to export (default: {default_workspace})',
)
parser.add_argument(
'-o', '--output',
required=True,
help='Output bundle path, e.g. workspace-default.tgx',
)
args = parser.parse_args()
try:
export_workspace(
url=args.api_url,
workspace=args.workspace,
output=args.output,
token=args.token,
)
except Exception as e:
print("Exception:", e, flush=True)
sys.exit(1)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,194 @@
"""
Imports a workspace bundle (.tgx, produced by tg-export-workspace) into a
TrustGraph deployment. The target workspace defaults to the name recorded
in the bundle's manifest and can be renamed with --workspace.
By default existing (type, key) entries in the target workspace are left
untouched and only missing keys are added, matching WorkspaceInit's
re-run behaviour; pass --overwrite to replace every imported key. Use
--dry-run to show what would be written without changing anything.
"""
import argparse
import json
import os
import sys
import tarfile
from trustgraph.api import Api
from trustgraph.api.types import ConfigValue
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
SUPPORTED_FORMAT = "tgx"
SUPPORTED_FORMAT_VERSION = 1
def _read_bundle(path):
"""Read manifest and config entries from a .tgx bundle."""
manifest = None
entries = []
with tarfile.open(path, "r:gz") as tar:
for member in tar.getmembers():
if not member.isfile():
continue
f = tar.extractfile(member)
if f is None:
continue
data = f.read()
if member.name == "manifest.json":
manifest = json.loads(data)
elif member.name.startswith("config/") and \
member.name.endswith(".json"):
entries.append(json.loads(data))
if manifest is None:
raise RuntimeError("not a workspace bundle: manifest.json missing")
if manifest.get("format") != SUPPORTED_FORMAT:
raise RuntimeError(
f"unsupported bundle format: {manifest.get('format')!r}"
)
if manifest.get("format_version", 0) > SUPPORTED_FORMAT_VERSION:
raise RuntimeError(
f"bundle format version {manifest.get('format_version')} is "
f"newer than this tool supports ({SUPPORTED_FORMAT_VERSION}); "
"upgrade trustgraph-cli"
)
return manifest, entries
def import_workspace(
url, input, workspace=None, overwrite=False, config_only=False,
dry_run=False, token=None,
):
manifest, entries = _read_bundle(input)
# Knowledge import (triples, documents, embeddings) is not implemented
# yet; refuse to silently drop it from a bundle that carries it.
if manifest.get("contents", {}).get("knowledge") and not config_only:
raise RuntimeError(
"bundle contains knowledge data, which this tool cannot import "
"yet; re-run with --config-only to import just the configuration"
)
target = workspace or manifest.get("workspace") or "default"
api = Api(url, token=token, workspace=target).config()
# Mirror WorkspaceInit's re-run behaviour: without --overwrite, keys
# already present in the target workspace are skipped (per key, not per
# type). The config API's put is a blanket upsert, so filter client-side.
existing = {}
if not overwrite:
for type_ in sorted({e["type"] for e in entries}):
existing[type_] = set(api.list(type_))
values = []
skipped = 0
for e in entries:
type_, key, value = e["type"], e["key"], e["value"]
if not overwrite and key in existing.get(type_, set()):
skipped += 1
continue
# Config values are stored as JSON strings (see WorkspaceInit).
values.append(
ConfigValue(type=type_, key=key, value=json.dumps(value))
)
if dry_run:
for v in values:
print(f"would import {v.type}/{v.key}", flush=True)
print(f"Dry run: {len(values)} item(s) would be imported into "
f"workspace '{target}', {skipped} skipped as existing",
flush=True)
return
if values:
api.put(values)
print(f"Imported {len(values)} config item(s) into workspace "
f"'{target}', {skipped} skipped as existing", flush=True)
def main():
parser = argparse.ArgumentParser(
prog='tg-import-workspace',
description=__doc__,
)
parser.add_argument(
'-u', '--api-url',
default=default_url,
help=f'API URL (default: {default_url})',
)
parser.add_argument(
'-t', '--token',
default=default_token,
help='API token (default: TRUSTGRAPH_TOKEN environment variable)',
)
parser.add_argument(
'-i', '--input',
required=True,
help='Input bundle path, e.g. workspace-default.tgx',
)
parser.add_argument(
'-w', '--workspace',
default=None,
help='Target workspace (default: the workspace recorded in the '
'bundle manifest)',
)
parser.add_argument(
'--overwrite',
action='store_true',
help='Replace existing keys in the target workspace (default: '
'keep existing keys and only add missing ones)',
)
parser.add_argument(
'--config-only',
action='store_true',
help='Import only the configuration, skipping any knowledge data '
'in the bundle',
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Show what would be imported without writing anything',
)
args = parser.parse_args()
try:
import_workspace(
url=args.api_url,
input=args.input,
workspace=args.workspace,
overwrite=args.overwrite,
config_only=args.config_only,
dry_run=args.dry_run,
token=args.token,
)
except Exception as e:
print("Exception:", e, flush=True)
sys.exit(1)
if __name__ == "__main__":
main()