mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
Schema structure refactor (#451)
* Write schema refactor spec * Implemented schema refactor spec
This commit is contained in:
parent
f4733021c5
commit
5de56c5dbc
27 changed files with 370 additions and 223 deletions
35
trustgraph-base/trustgraph/schema/README.flows
Normal file
35
trustgraph-base/trustgraph/schema/README.flows
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
pdf-
|
||||
decoder
|
||||
|
||||
|
|
||||
v
|
||||
|
||||
chunker
|
||||
|
||||
|
|
||||
,------------------+----------- . . .
|
||||
| |
|
||||
v v
|
||||
|
||||
extract- extract-
|
||||
relationships definitions
|
||||
|
||||
| | |
|
||||
+----------------' |
|
||||
| v
|
||||
v
|
||||
vectorize
|
||||
triple-
|
||||
store |
|
||||
v
|
||||
|
||||
ge-write
|
||||
|
||||
Refactor:
|
||||
|
||||
[] Change vectorize
|
||||
[] Re-route chunker to extract-*
|
||||
[] Re-route vectorize to ge-write*
|
||||
[] Re-route extract-definitions to ge-write*
|
||||
[] Remove extract-relationships to ge-write routing
|
||||
|
|
@ -1,17 +1,10 @@
|
|||
|
||||
from . types import *
|
||||
from . prompt import *
|
||||
from . documents import *
|
||||
from . models import *
|
||||
from . object import *
|
||||
from . topic import *
|
||||
from . graph import *
|
||||
from . retrieval import *
|
||||
from . metadata import *
|
||||
from . agent import *
|
||||
from . lookup import *
|
||||
from . library import *
|
||||
from . config import *
|
||||
from . flows import *
|
||||
from . knowledge import *
|
||||
# Import core types and primitives
|
||||
from .core import *
|
||||
|
||||
# Import knowledge schemas
|
||||
from .knowledge import *
|
||||
|
||||
# Import service schemas
|
||||
from .services import *
|
||||
|
||||
|
|
|
|||
3
trustgraph-base/trustgraph/schema/core/__init__.py
Normal file
3
trustgraph-base/trustgraph/schema/core/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .primitives import *
|
||||
from .metadata import *
|
||||
from .topic import *
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array
|
||||
from . types import Triple
|
||||
from .primitives import Triple
|
||||
|
||||
class Metadata(Record):
|
||||
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from . metadata import Metadata
|
||||
|
||||
############################################################################
|
||||
|
||||
# PDF docs etc.
|
||||
class Document(Record):
|
||||
metadata = Metadata()
|
||||
data = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Text documents / text from PDF
|
||||
|
||||
class TextDocument(Record):
|
||||
metadata = Metadata()
|
||||
text = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunks of text
|
||||
|
||||
class Chunk(Record):
|
||||
metadata = Metadata()
|
||||
chunk = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Document embeddings are embeddings associated with a chunk
|
||||
|
||||
class ChunkEmbeddings(Record):
|
||||
chunk = Bytes()
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class DocumentEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
chunks = Array(ChunkEmbeddings())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Doc embeddings query
|
||||
|
||||
class DocumentEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class DocumentEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
documents = Array(Bytes())
|
||||
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
|
||||
from . types import Error, Value, Triple
|
||||
from . topic import topic
|
||||
from . metadata import Metadata
|
||||
|
||||
############################################################################
|
||||
|
||||
# Entity context are an entity associated with textual context
|
||||
|
||||
class EntityContext(Record):
|
||||
entity = Value()
|
||||
context = String()
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class EntityContexts(Record):
|
||||
metadata = Metadata()
|
||||
entities = Array(EntityContext())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings are embeddings associated with a graph entity
|
||||
|
||||
class EntityEmbeddings(Record):
|
||||
entity = Value()
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class GraphEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
entities = Array(EntityEmbeddings())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings query
|
||||
|
||||
class GraphEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class GraphEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
entities = Array(Value())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples
|
||||
|
||||
class Triples(Record):
|
||||
metadata = Metadata()
|
||||
triples = Array(Triple())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Triples query
|
||||
|
||||
class TriplesQueryRequest(Record):
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class TriplesQueryResponse(Record):
|
||||
error = Error()
|
||||
triples = Array(Triple())
|
||||
|
||||
6
trustgraph-base/trustgraph/schema/knowledge/__init__.py
Normal file
6
trustgraph-base/trustgraph/schema/knowledge/__init__.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
from .graph import *
|
||||
from .document import *
|
||||
from .embeddings import *
|
||||
from .knowledge import *
|
||||
from .nlp import *
|
||||
from .rows import *
|
||||
29
trustgraph-base/trustgraph/schema/knowledge/document.py
Normal file
29
trustgraph-base/trustgraph/schema/knowledge/document.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
from pulsar.schema import Record, Bytes
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# PDF docs etc.
|
||||
class Document(Record):
|
||||
metadata = Metadata()
|
||||
data = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Text documents / text from PDF
|
||||
|
||||
class TextDocument(Record):
|
||||
metadata = Metadata()
|
||||
text = Bytes()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Chunks of text
|
||||
|
||||
class Chunk(Record):
|
||||
metadata = Metadata()
|
||||
chunk = Bytes()
|
||||
|
||||
############################################################################
|
||||
43
trustgraph-base/trustgraph/schema/knowledge/embeddings.py
Normal file
43
trustgraph-base/trustgraph/schema/knowledge/embeddings.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double, Map
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.primitives import Value, RowSchema
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings are embeddings associated with a graph entity
|
||||
|
||||
class EntityEmbeddings(Record):
|
||||
entity = Value()
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class GraphEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
entities = Array(EntityEmbeddings())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Document embeddings are embeddings associated with a chunk
|
||||
|
||||
class ChunkEmbeddings(Record):
|
||||
chunk = Bytes()
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class DocumentEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
chunks = Array(ChunkEmbeddings())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Object embeddings are embeddings associated with the primary key of an
|
||||
# object
|
||||
|
||||
class ObjectEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
vectors = Array(Array(Double()))
|
||||
name = String()
|
||||
key_name = String()
|
||||
id = String()
|
||||
28
trustgraph-base/trustgraph/schema/knowledge/graph.py
Normal file
28
trustgraph-base/trustgraph/schema/knowledge/graph.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
from pulsar.schema import Record, String, Array
|
||||
|
||||
from ..core.primitives import Value, Triple
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Entity context are an entity associated with textual context
|
||||
|
||||
class EntityContext(Record):
|
||||
entity = Value()
|
||||
context = String()
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class EntityContexts(Record):
|
||||
metadata = Metadata()
|
||||
entities = Array(EntityContext())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples
|
||||
|
||||
class Triples(Record):
|
||||
metadata = Metadata()
|
||||
triples = Array(Triple())
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Array, Long, Boolean
|
||||
from . types import Triple
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from . metadata import Metadata
|
||||
from . documents import Document, TextDocument
|
||||
from . graph import Triples, GraphEmbeddings
|
||||
from ..core.primitives import Triple, Error
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
from .document import Document, TextDocument
|
||||
from .graph import Triples
|
||||
from .embeddings import GraphEmbeddings
|
||||
|
||||
# get-kg-core
|
||||
# -> (???)
|
||||
26
trustgraph-base/trustgraph/schema/knowledge/nlp.py
Normal file
26
trustgraph-base/trustgraph/schema/knowledge/nlp.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
from pulsar.schema import Record, String, Boolean
|
||||
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# NLP extraction data types
|
||||
|
||||
class Definition(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Topic(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Relationship(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
o_entity = Boolean()
|
||||
|
||||
class Fact(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
16
trustgraph-base/trustgraph/schema/knowledge/rows.py
Normal file
16
trustgraph-base/trustgraph/schema/knowledge/rows.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
from pulsar.schema import Record, Array, Map, String
|
||||
|
||||
from ..core.metadata import Metadata
|
||||
from ..core.primitives import RowSchema
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Stores rows of information
|
||||
|
||||
class Rows(Record):
|
||||
metadata = Metadata()
|
||||
row_schema = RowSchema()
|
||||
rows = Array(Map(String()))
|
||||
|
||||
############################################################################
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array
|
||||
from pulsar.schema import Double, Map
|
||||
|
||||
from . metadata import Metadata
|
||||
from . types import Value, RowSchema
|
||||
from . topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Object embeddings are embeddings associated with the primary key of an
|
||||
# object
|
||||
|
||||
class ObjectEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
vectors = Array(Array(Double()))
|
||||
name = String()
|
||||
key_name = String()
|
||||
id = String()
|
||||
|
||||
############################################################################
|
||||
|
||||
# Stores rows of information
|
||||
|
||||
class Rows(Record):
|
||||
metadata = Metadata()
|
||||
row_schema = RowSchema()
|
||||
rows = Array(Map(String()))
|
||||
|
||||
|
||||
|
||||
9
trustgraph-base/trustgraph/schema/services/__init__.py
Normal file
9
trustgraph-base/trustgraph/schema/services/__init__.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from .llm import *
|
||||
from .retrieval import *
|
||||
from .query import *
|
||||
from .agent import *
|
||||
from .flow import *
|
||||
from .prompt import *
|
||||
from .config import *
|
||||
from .library import *
|
||||
from .lookup import *
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array, Map
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -1,10 +1,9 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Array, Long
|
||||
from . types import Triple
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from . metadata import Metadata
|
||||
from . documents import Document, TextDocument
|
||||
from ..core.primitives import Triple, Error
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
from ..knowledge.document import Document, TextDocument
|
||||
|
||||
# add-document
|
||||
# -> (document_id, document_metadata, content)
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
|
||||
from pulsar.schema import Record, String, Array, Double, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
|
||||
from pulsar.schema import Record, String
|
||||
|
||||
from . types import Error, Value, Triple
|
||||
from . topic import topic
|
||||
from . metadata import Metadata
|
||||
from ..core.primitives import Error, Value, Triple
|
||||
from ..core.topic import topic
|
||||
from ..core.metadata import Metadata
|
||||
|
||||
############################################################################
|
||||
|
||||
|
|
@ -1,32 +1,12 @@
|
|||
from pulsar.schema import Record, String, Map
|
||||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
|
||||
|
||||
from . topic import topic
|
||||
from . types import Error, RowSchema
|
||||
from ..core.primitives import Error
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Prompt services, abstract the prompt generation
|
||||
|
||||
class Definition(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Topic(Record):
|
||||
name = String()
|
||||
definition = String()
|
||||
|
||||
class Relationship(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
o_entity = Boolean()
|
||||
|
||||
class Fact(Record):
|
||||
s = String()
|
||||
p = String()
|
||||
o = String()
|
||||
|
||||
# extract-definitions:
|
||||
# chunk -> definitions
|
||||
# extract-relationships:
|
||||
|
|
@ -55,5 +35,4 @@ class PromptResponse(Record):
|
|||
# JSON encoded
|
||||
object = String()
|
||||
|
||||
############################################################################
|
||||
|
||||
############################################################################
|
||||
48
trustgraph-base/trustgraph/schema/services/query.py
Normal file
48
trustgraph-base/trustgraph/schema/services/query.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
from pulsar.schema import Record, String, Integer, Array, Double
|
||||
|
||||
from ..core.primitives import Error, Value, Triple
|
||||
from ..core.topic import topic
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph embeddings query
|
||||
|
||||
class GraphEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class GraphEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
entities = Array(Value())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Graph triples query
|
||||
|
||||
class TriplesQueryRequest(Record):
|
||||
user = String()
|
||||
collection = String()
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
limit = Integer()
|
||||
|
||||
class TriplesQueryResponse(Record):
|
||||
error = Error()
|
||||
triples = Array(Triple())
|
||||
|
||||
############################################################################
|
||||
|
||||
# Doc embeddings query
|
||||
|
||||
class DocumentEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class DocumentEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
chunks = Array(String())
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
from . topic import topic
|
||||
from . types import Error, Value
|
||||
from ..core.topic import topic
|
||||
from ..core.primitives import Error, Value
|
||||
|
||||
############################################################################
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue