trustgraph/dev-tools/library_client.py

#!/usr/bin/env python3

"""
Client utility for browsing and loading documents from the TrustGraph
public document library.

Usage:
    python library_client.py list
    python library_client.py search <text>
    python library_client.py load-all
    python library_client.py load-doc <id>
    python library_client.py load-match <text>
"""

import json
import urllib.request
import sys
import os
import argparse

from trustgraph.api import Api
from trustgraph.api.types import Uri, Literal, Triple

BUCKET_URL = "https://storage.googleapis.com/trustgraph-library"
INDEX_URL = f"{BUCKET_URL}/index.json"

default_url = os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/")
default_user = "trustgraph"
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)


def fetch_index():
    with urllib.request.urlopen(INDEX_URL) as resp:
        return json.loads(resp.read())


def fetch_document_metadata(doc_id):
    url = f"{BUCKET_URL}/{doc_id}.json"
    with urllib.request.urlopen(url) as resp:
        return json.loads(resp.read())


def fetch_document_content(doc_id):
    url = f"{BUCKET_URL}/{doc_id}.epub"
    with urllib.request.urlopen(url) as resp:
        return resp.read()


def search_index(index, query):
    query = query.lower()
    results = []
    for doc in index:
        title = doc.get("title", "").lower()
        comments = doc.get("comments", "").lower()
        tags = [t.lower() for t in doc.get("tags", [])]
        if (query in title or query in comments or
                any(query in t for t in tags)):
            results.append(doc)
    return results


def print_index(index):
    if not index:
        return

    # Calculate column widths
    id_width = max(len(str(doc.get("id", ""))) for doc in index)
    title_width = max(len(doc.get("title", "")) for doc in index)

    # Cap title width for readability
    title_width = min(title_width, 60)
    id_width = max(id_width, 2)

    try:
        term_width = os.get_terminal_size().columns
    except OSError:
        term_width = 120

    tags_width = max(term_width - id_width - title_width - 6, 20)

    header = f"{'ID':<{id_width}}  {'Title':<{title_width}}  {'Tags':<{tags_width}}"
    print(header)
    print("-" * len(header))

    for doc in index:
        eid = str(doc.get("id", ""))
        title = doc.get("title", "")
        if len(title) > title_width:
            title = title[:title_width - 3] + "..."
        tags = ", ".join(doc.get("tags", []))
        if len(tags) > tags_width:
            tags = tags[:tags_width - 3] + "..."
        print(f"{eid:<{id_width}}  {title:<{title_width}}  {tags}")


def convert_value(v):
    """Convert a JSON triple value to a Uri or Literal."""
    if v["type"] == "uri":
        return Uri(v["value"])
    else:
        return Literal(v["value"])


def convert_metadata(metadata_json):
    """Convert JSON metadata triples to Triple objects."""
    triples = []
    for t in metadata_json:
        triples.append(Triple(
            s=convert_value(t["s"]),
            p=convert_value(t["p"]),
            o=convert_value(t["o"]),
        ))
    return triples


def load_document(api, user, doc_entry):
    """Fetch metadata and content for a document, then load into TrustGraph."""
    doc_id = doc_entry["id"]
    title = doc_entry["title"]

    print(f"  [{doc_id}] {title}")

    print(f"    fetching metadata...")
    doc_json = fetch_document_metadata(doc_id)
    doc = doc_json[0]

    print(f"    fetching content...")
    content = fetch_document_content(doc_id)

    print(f"    loading into TrustGraph ({len(content) // 1024}KB)...")
    metadata = convert_metadata(doc["metadata"])

    api.add_document(
        id=doc["id"],
        metadata=metadata,
        user=user,
        kind=doc["kind"],
        title=doc["title"],
        comments=doc["comments"],
        tags=doc["tags"],
        document=content,
    )

    print(f"    done.")


def load_documents(api, user, docs):
    """Load a list of documents."""
    print(f"Loading {len(docs)} document(s)...\n")
    for doc in docs:
        try:
            load_document(api, user, doc)
        except Exception as e:
            print(f"    FAILED: {e}", file=sys.stderr)
        print()
    print("Complete.")


def main():
    parser = argparse.ArgumentParser(
        description="Browse and load documents from the TrustGraph public document library.",
    )

    parser.add_argument(
        "-u", "--url", default=default_url,
        help=f"TrustGraph API URL (default: {default_url})",
    )
    parser.add_argument(
        "-U", "--user", default=default_user,
        help=f"User ID (default: {default_user})",
    )
    parser.add_argument(
        "-t", "--token", default=default_token,
        help="Authentication token (default: $TRUSTGRAPH_TOKEN)",
    )

    sub = parser.add_subparsers(dest="command")

    sub.add_parser("list", help="List all documents")

    search_parser = sub.add_parser("search", help="Search documents")
    search_parser.add_argument("query", help="Text to search for")

    sub.add_parser("load-all", help="Load all documents into TrustGraph")

    load_doc_parser = sub.add_parser("load-doc", help="Load a document by ID")
    load_doc_parser.add_argument("id", help="Document ID (ebook number)")

    load_match_parser = sub.add_parser(
        "load-match", help="Load all documents matching a search term",
    )
    load_match_parser.add_argument("query", help="Text to search for")

    args = parser.parse_args()

    if args.command is None:
        parser.print_help()
        sys.exit(1)

    index = fetch_index()

    if args.command in ("list", "search"):
        if args.command == "list":
            print_index(index)
        else:
            results = search_index(index, args.query)
            if results:
                print_index(results)
            else:
                print("No matches found.", file=sys.stderr)
                sys.exit(1)
        return

    # Load commands need the API
    api = Api(args.url, token=args.token).library()

    if args.command == "load-all":
        load_documents(api, args.user, index)

    elif args.command == "load-doc":
        matches = [d for d in index if str(d.get("id")) == args.id]
        if not matches:
            print(f"No document with ID '{args.id}' found.", file=sys.stderr)
            sys.exit(1)
        load_documents(api, args.user, matches)

    elif args.command == "load-match":
        results = search_index(index, args.query)
        if results:
            load_documents(api, args.user, results)
        else:
            print("No matches found.", file=sys.stderr)
            sys.exit(1)


if __name__ == "__main__":
    main()
Config push notify pattern: replace stateful pub/sub with signal+ fetch (#760) Replace the config push mechanism that broadcast the full config blob on a 'state' class pub/sub queue with a lightweight notify signal containing only the version number and affected config types. Processors fetch the full config via request/response from the config service when notified. This eliminates the need for the pub/sub 'state' queue class and stateful pub/sub services entirely. The config push queue moves from 'state' to 'flow' class — a simple transient signal rather than a retained message. This solves the RabbitMQ late-subscriber problem where restarting processes never received the current config because their fresh queue had no historical messages. Key changes: - ConfigPush schema: config dict replaced with types list - Subscribe-then-fetch startup with retry: processors subscribe to notify queue, fetch config via request/response, then process buffered notifies with version comparison to avoid race conditions - register_config_handler() accepts optional types parameter so handlers only fire when their config types change - Short-lived config request/response clients to avoid subscriber contention on non-persistent response topics - Config service passes affected types through put/delete/flow operations - Gateway ConfigReceiver rewritten with same notify pattern and retry loop Tests updated New tests: - register_config_handler: without types, with types, multiple types, multiple handlers - on_config_notify: old/same version skipped, irrelevant types skipped (version still updated), relevant type triggers fetch, handler without types always called, mixed handler filtering, empty types invokes all, fetch failure handled gracefully - fetch_config: returns config+version, raises on error response, stops client even on exception - fetch_and_apply_config: applies to all handlers on startup, retries on failure 2026-04-06 16:57:27 +01:00			`#!/usr/bin/env python3`

			`"""`
			`Client utility for browsing and loading documents from the TrustGraph`
			`public document library.`

			`Usage:`
			`python library_client.py list`
			`python library_client.py search <text>`
			`python library_client.py load-all`
			`python library_client.py load-doc <id>`
			`python library_client.py load-match <text>`
			`"""`

			`import json`
			`import urllib.request`
			`import sys`
			`import os`
			`import argparse`

			`from trustgraph.api import Api`
			`from trustgraph.api.types import Uri, Literal, Triple`

			`BUCKET_URL = "https://storage.googleapis.com/trustgraph-library"`
			`INDEX_URL = f"{BUCKET_URL}/index.json"`

			`default_url = os.getenv("TRUSTGRAPH_URL", "http://localhost:8088/")`
			`default_user = "trustgraph"`
			`default_token = os.getenv("TRUSTGRAPH_TOKEN", None)`


			`def fetch_index():`
			`with urllib.request.urlopen(INDEX_URL) as resp:`
			`return json.loads(resp.read())`


			`def fetch_document_metadata(doc_id):`
			`url = f"{BUCKET_URL}/{doc_id}.json"`
			`with urllib.request.urlopen(url) as resp:`
			`return json.loads(resp.read())`


			`def fetch_document_content(doc_id):`
			`url = f"{BUCKET_URL}/{doc_id}.epub"`
			`with urllib.request.urlopen(url) as resp:`
			`return resp.read()`


			`def search_index(index, query):`
			`query = query.lower()`
			`results = []`
			`for doc in index:`
			`title = doc.get("title", "").lower()`
			`comments = doc.get("comments", "").lower()`
			`tags = [t.lower() for t in doc.get("tags", [])]`
			`if (query in title or query in comments or`
			`any(query in t for t in tags)):`
			`results.append(doc)`
			`return results`


			`def print_index(index):`
			`if not index:`
			`return`

			`# Calculate column widths`
			`id_width = max(len(str(doc.get("id", ""))) for doc in index)`
			`title_width = max(len(doc.get("title", "")) for doc in index)`

			`# Cap title width for readability`
			`title_width = min(title_width, 60)`
			`id_width = max(id_width, 2)`

			`try:`
			`term_width = os.get_terminal_size().columns`
			`except OSError:`
			`term_width = 120`

			`tags_width = max(term_width - id_width - title_width - 6, 20)`

			`header = f"{'ID':<{id_width}} {'Title':<{title_width}} {'Tags':<{tags_width}}"`
			`print(header)`
			`print("-" * len(header))`

			`for doc in index:`
			`eid = str(doc.get("id", ""))`
			`title = doc.get("title", "")`
			`if len(title) > title_width:`
			`title = title[:title_width - 3] + "..."`
			`tags = ", ".join(doc.get("tags", []))`
			`if len(tags) > tags_width:`
			`tags = tags[:tags_width - 3] + "..."`
			`print(f"{eid:<{id_width}} {title:<{title_width}} {tags}")`


			`def convert_value(v):`
			`"""Convert a JSON triple value to a Uri or Literal."""`
			`if v["type"] == "uri":`
			`return Uri(v["value"])`
			`else:`
			`return Literal(v["value"])`


			`def convert_metadata(metadata_json):`
			`"""Convert JSON metadata triples to Triple objects."""`
			`triples = []`
			`for t in metadata_json:`
			`triples.append(Triple(`
			`s=convert_value(t["s"]),`
			`p=convert_value(t["p"]),`
			`o=convert_value(t["o"]),`
			`))`
			`return triples`


			`def load_document(api, user, doc_entry):`
			`"""Fetch metadata and content for a document, then load into TrustGraph."""`
			`doc_id = doc_entry["id"]`
			`title = doc_entry["title"]`

			`print(f" [{doc_id}] {title}")`

			`print(f" fetching metadata...")`
			`doc_json = fetch_document_metadata(doc_id)`
			`doc = doc_json[0]`

			`print(f" fetching content...")`
			`content = fetch_document_content(doc_id)`

			`print(f" loading into TrustGraph ({len(content) // 1024}KB)...")`
			`metadata = convert_metadata(doc["metadata"])`

			`api.add_document(`
			`id=doc["id"],`
			`metadata=metadata,`
			`user=user,`
			`kind=doc["kind"],`
			`title=doc["title"],`
			`comments=doc["comments"],`
			`tags=doc["tags"],`
			`document=content,`
			`)`

			`print(f" done.")`


			`def load_documents(api, user, docs):`
			`"""Load a list of documents."""`
			`print(f"Loading {len(docs)} document(s)...\n")`
			`for doc in docs:`
			`try:`
			`load_document(api, user, doc)`
			`except Exception as e:`
			`print(f" FAILED: {e}", file=sys.stderr)`
			`print()`
			`print("Complete.")`


			`def main():`
			`parser = argparse.ArgumentParser(`
			`description="Browse and load documents from the TrustGraph public document library.",`
			`)`

			`parser.add_argument(`
			`"-u", "--url", default=default_url,`
			`help=f"TrustGraph API URL (default: {default_url})",`
			`)`
			`parser.add_argument(`
			`"-U", "--user", default=default_user,`
			`help=f"User ID (default: {default_user})",`
			`)`
			`parser.add_argument(`
			`"-t", "--token", default=default_token,`
			`help="Authentication token (default: $TRUSTGRAPH_TOKEN)",`
			`)`

			`sub = parser.add_subparsers(dest="command")`

			`sub.add_parser("list", help="List all documents")`

			`search_parser = sub.add_parser("search", help="Search documents")`
			`search_parser.add_argument("query", help="Text to search for")`

			`sub.add_parser("load-all", help="Load all documents into TrustGraph")`

			`load_doc_parser = sub.add_parser("load-doc", help="Load a document by ID")`
			`load_doc_parser.add_argument("id", help="Document ID (ebook number)")`

			`load_match_parser = sub.add_parser(`
			`"load-match", help="Load all documents matching a search term",`
			`)`
			`load_match_parser.add_argument("query", help="Text to search for")`

			`args = parser.parse_args()`

			`if args.command is None:`
			`parser.print_help()`
			`sys.exit(1)`

			`index = fetch_index()`

			`if args.command in ("list", "search"):`
			`if args.command == "list":`
			`print_index(index)`
			`else:`
			`results = search_index(index, args.query)`
			`if results:`
			`print_index(results)`
			`else:`
			`print("No matches found.", file=sys.stderr)`
			`sys.exit(1)`
			`return`

			`# Load commands need the API`
			`api = Api(args.url, token=args.token).library()`

			`if args.command == "load-all":`
			`load_documents(api, args.user, index)`

			`elif args.command == "load-doc":`
			`matches = [d for d in index if str(d.get("id")) == args.id]`
			`if not matches:`
			`print(f"No document with ID '{args.id}' found.", file=sys.stderr)`
			`sys.exit(1)`
			`load_documents(api, args.user, matches)`

			`elif args.command == "load-match":`
			`results = search_index(index, args.query)`
			`if results:`
			`load_documents(api, args.user, results)`
			`else:`
			`print("No matches found.", file=sys.stderr)`
			`sys.exit(1)`


			`if __name__ == "__main__":`
			`main()`