mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Trustgraph initial code drop
This commit is contained in:
parent
c5f4604a7b
commit
9b5cbbf9ca
94 changed files with 5399 additions and 0 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
*~
|
||||
__pycache__/
|
||||
env/
|
||||
*.egg_info/
|
||||
54
Containerfile
Normal file
54
Containerfile
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Build an AI container. This does the torch install which is huge, and I
|
||||
# like to avoid re-doing this.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
FROM docker.io/fedora:40 AS ai
|
||||
|
||||
ENV PIP_BREAK_SYSTEM_PACKAGES=1
|
||||
|
||||
RUN dnf install -y python3 python3-pip python3-wheel python3-aiohttp \
|
||||
python3-rdflib
|
||||
|
||||
RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
|
||||
|
||||
RUN pip3 install anthropic google-cloud-aiplatform langchain langchain-core \
|
||||
langchain-huggingface langchain-text-splitters langchain-community \
|
||||
pymilvus sentence-transformers transformers huggingface-hub \
|
||||
pulsar-client && \
|
||||
pip3 cache purge
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Build a container which contains the built Python package. The build
|
||||
# creates a bunch of left-over cruft, a separate phase means this is only
|
||||
# needed to support package build
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
FROM ai AS build
|
||||
|
||||
env PACKAGE_VERSION=0.0.0
|
||||
|
||||
COPY setup.py /root/build/
|
||||
COPY README.md /root/build/
|
||||
COPY scripts/ /root/build/scripts/
|
||||
COPY trustgraph/ root/build/trustgraph/
|
||||
|
||||
RUN (cd /root/build && pip3 wheel -w /root/wheels --no-deps .)
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Finally, the target container. Start with base and add the package.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
FROM ai
|
||||
|
||||
COPY --from=build /root/wheels /root/wheels
|
||||
|
||||
RUN pip3 install /root/wheels/trustgraph-* && \
|
||||
pip3 cache purge && \
|
||||
rm -rf /root/wheels
|
||||
|
||||
WORKDIR /
|
||||
|
||||
CMD sleep 1000000
|
||||
|
||||
202
LICENSE
Normal file
202
LICENSE
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
30
Makefile
Normal file
30
Makefile
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
|
||||
# VERSION=$(shell git describe | sed 's/^v//')
|
||||
VERSION=0.1.16
|
||||
|
||||
all: container
|
||||
|
||||
CONTAINER=docker.io/trustgraph/trustgraph-flow
|
||||
|
||||
container:
|
||||
podman build -f Containerfile -t ${CONTAINER}:${VERSION} \
|
||||
--format docker
|
||||
|
||||
push:
|
||||
podman push ${CONTAINER}:${VERSION}
|
||||
|
||||
start:
|
||||
podman run -i -t --name ${NAME} \
|
||||
-i -t \
|
||||
-p 8081:8081 \
|
||||
-v $$(pwd)/keys:/keys \
|
||||
-v $$(pwd)/configs:/configs \
|
||||
${CONTAINER}:${VERSION}
|
||||
|
||||
stop:
|
||||
podman rm -f ${NAME}
|
||||
|
||||
clean:
|
||||
rm -rf wheels/
|
||||
|
||||
# sed -i 's/0.1.15/0.1.16/' docker-compose*.yaml
|
||||
BIN
architecture.png
Normal file
BIN
architecture.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 77 KiB |
598
architecture.svg
Normal file
598
architecture.svg
Normal file
|
|
@ -0,0 +1,598 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
width="244mm"
|
||||
height="180mm"
|
||||
viewBox="0 0 244 180"
|
||||
version="1.1"
|
||||
id="svg1"
|
||||
inkscape:version="1.3.2 (091e20ef0f, 2023-11-25)"
|
||||
sodipodi:docname="architecture.svg"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:svg="http://www.w3.org/2000/svg">
|
||||
<sodipodi:namedview
|
||||
id="namedview1"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:showpageshadow="2"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pagecheckerboard="0"
|
||||
inkscape:deskcolor="#d1d1d1"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:zoom="1.2649603"
|
||||
inkscape:cx="560.88716"
|
||||
inkscape:cy="333.6073"
|
||||
inkscape:window-width="2197"
|
||||
inkscape:window-height="1210"
|
||||
inkscape:window-x="26"
|
||||
inkscape:window-y="23"
|
||||
inkscape:window-maximized="0"
|
||||
inkscape:current-layer="layer1" />
|
||||
<defs
|
||||
id="defs1">
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="ArrowWideHeavy"
|
||||
refX="0.80000001"
|
||||
refY="0"
|
||||
orient="auto-start-reverse"
|
||||
inkscape:stockid="Wide, heavy arrow"
|
||||
markerWidth="1"
|
||||
markerHeight="1"
|
||||
viewBox="0 0 1 1"
|
||||
inkscape:isstock="true"
|
||||
inkscape:collect="always"
|
||||
preserveAspectRatio="xMidYMid">
|
||||
<path
|
||||
style="fill:context-stroke;fill-rule:evenodd;stroke:none"
|
||||
d="m 1,0 -3,3 h -2 l 3,-3 -3,-3 h 2 z"
|
||||
id="path3" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="ArrowWide"
|
||||
refX="0.80000001"
|
||||
refY="0"
|
||||
orient="auto-start-reverse"
|
||||
inkscape:stockid="Wide arrow"
|
||||
markerWidth="1"
|
||||
markerHeight="1"
|
||||
viewBox="0 0 1 1"
|
||||
inkscape:isstock="true"
|
||||
inkscape:collect="always"
|
||||
preserveAspectRatio="xMidYMid">
|
||||
<path
|
||||
style="fill:none;stroke:context-stroke;stroke-width:1;stroke-linecap:butt"
|
||||
d="M 3,-3 0,0 3,3"
|
||||
transform="rotate(180,0.125,0)"
|
||||
sodipodi:nodetypes="ccc"
|
||||
id="path1" />
|
||||
</marker>
|
||||
</defs>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1">
|
||||
<g
|
||||
id="g21"
|
||||
transform="translate(-27.330175,0.07547337)">
|
||||
<rect
|
||||
style="fill:#aad8ff;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="33.201935"
|
||||
y="62.749554"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3c82ca;stroke-width:0.264999;stop-color:#000000"
|
||||
x="37.424816"
|
||||
y="71.271751"
|
||||
id="text1"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan1"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.265"
|
||||
x="37.424816"
|
||||
y="71.271751">loader</tspan></text>
|
||||
</g>
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-9"
|
||||
width="183.35152"
|
||||
height="72.800163"
|
||||
x="43.44128"
|
||||
y="40.571789"
|
||||
ry="3.4270816"
|
||||
rx="3.4270811" />
|
||||
<g
|
||||
id="g9"
|
||||
transform="translate(-40.653793,-21.359345)">
|
||||
<rect
|
||||
style="fill:#aad8ff;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="93.11467"
|
||||
y="84.184372"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3c82ca;stroke-width:0.264999;stop-color:#000000"
|
||||
x="104.29943"
|
||||
y="90.382828"
|
||||
id="text1-7-3"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="104.29943"
|
||||
y="90.382828"
|
||||
id="tspan4-4">pdf</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="104.29943"
|
||||
y="94.333939"
|
||||
id="tspan5">decoder</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g10"
|
||||
transform="translate(-22.520217,-41.429375)">
|
||||
<rect
|
||||
style="fill:#aad8ff;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-1"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="107.27222"
|
||||
y="104.2544"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3c82ca;stroke-width:0.264999;stop-color:#000000"
|
||||
x="118.45468"
|
||||
y="112.51096"
|
||||
id="text1-7-3-0"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="118.45468"
|
||||
y="112.51096"
|
||||
id="tspan5-8">chunker</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g11"
|
||||
transform="translate(-14.347455,-23.319298)">
|
||||
<rect
|
||||
style="fill:#aad8ff;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-1-0"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="131.39058"
|
||||
y="86.144325"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3c82ca;stroke-width:0.264999;stop-color:#000000"
|
||||
x="142.57304"
|
||||
y="94.205971"
|
||||
id="text1-7-3-0-6"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="142.57304"
|
||||
y="94.205971"
|
||||
id="tspan5-8-6">vectorizer</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g12"
|
||||
transform="translate(7.0175781,-43.772896)">
|
||||
<rect
|
||||
style="fill:#aad8ff;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-1-0-8"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="142.31667"
|
||||
y="106.59792"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3c82ca;stroke-width:0.264999;stop-color:#000000"
|
||||
x="153.49913"
|
||||
y="112.87893"
|
||||
id="text1-7-3-0-6-5"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="153.49913"
|
||||
y="112.87893"
|
||||
id="tspan5-8-6-4">kg</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="153.49913"
|
||||
y="116.83004"
|
||||
id="tspan6">extractor</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g13"
|
||||
transform="translate(4.0865732,-36.403481)">
|
||||
<rect
|
||||
style="fill:#aad8ff;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-1-0-8-8"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="145.28687"
|
||||
y="129.16028"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3c82ca;stroke-width:0.264999;stop-color:#000000"
|
||||
x="156.47162"
|
||||
y="135.19592"
|
||||
id="text1-7-3-0-6-5-5"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="156.47162"
|
||||
y="135.19592"
|
||||
id="tspan6-8">llm</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="156.47162"
|
||||
y="139.14703"
|
||||
id="tspan7">ollama</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g14"
|
||||
transform="translate(6.0435632,-37.875265)">
|
||||
<rect
|
||||
style="fill:#aad8ff;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-1-0-8-8-0"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="173.83203"
|
||||
y="88.480156"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3c82ca;stroke-width:0.264999;stop-color:#000000"
|
||||
x="185.01678"
|
||||
y="94.641922"
|
||||
id="text1-7-3-0-6-5-5-7"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="185.01678"
|
||||
y="94.641922"
|
||||
id="tspan7-0">graph</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="185.01678"
|
||||
y="98.593033"
|
||||
id="tspan8">write</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g15"
|
||||
transform="translate(-15.314501,-39.11037)">
|
||||
<rect
|
||||
style="fill:#aad8ff;fill-opacity:1;stroke:#068dff;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-1-0-8-8-0-2"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="195.20018"
|
||||
y="114.15553"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3c82ca;stroke-width:0.264999;stop-color:#000000"
|
||||
x="206.38264"
|
||||
y="120.24162"
|
||||
id="text1-7-3-0-6-5-5-7-7"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="206.38264"
|
||||
y="120.24162"
|
||||
id="tspan8-7">vector</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;stroke-width:0.265"
|
||||
x="206.38264"
|
||||
y="124.19273"
|
||||
id="tspan9">store</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g22"
|
||||
transform="translate(-107.63453,14.622921)">
|
||||
<rect
|
||||
style="fill:#b3e5ce;fill-opacity:1;stroke:#3aa875;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-6"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="140.86087"
|
||||
y="21.397438"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.264999;stop-color:#000000"
|
||||
x="146.06114"
|
||||
y="29.816963"
|
||||
id="text1-7-5"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.265"
|
||||
x="146.06114"
|
||||
y="29.816963"
|
||||
id="tspan2">Pulsar</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g25"
|
||||
transform="translate(-36.373415,-1.3827656)">
|
||||
<rect
|
||||
style="fill:#b3e5ce;fill-opacity:1;stroke:#3aa875;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-6-3"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="185.74686"
|
||||
y="129.80525"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.264999;stop-color:#000000"
|
||||
x="189.59438"
|
||||
y="138.33592"
|
||||
id="text1-7-5-7"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.265"
|
||||
x="189.59438"
|
||||
y="138.33592"
|
||||
id="tspan2-3">Ollama</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g24"
|
||||
transform="translate(-100.09946,14.598037)">
|
||||
<rect
|
||||
style="fill:#b3e5ce;fill-opacity:1;stroke:#3aa875;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-6-3-7"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="249.47292"
|
||||
y="141.78751"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.8;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.264999;stop-color:#000000"
|
||||
x="260.65308"
|
||||
y="148.15459"
|
||||
id="text1-7-5-7-8"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.8;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3aa875;fill-opacity:1;stroke-width:0.265"
|
||||
x="260.65308"
|
||||
y="148.15459"
|
||||
id="tspan2-3-9">GPU</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.8;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3aa875;fill-opacity:1;stroke-width:0.265"
|
||||
x="260.65308"
|
||||
y="151.82347"
|
||||
id="tspan23">hardware</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g26"
|
||||
transform="translate(49.038967,-8.6910853)">
|
||||
<rect
|
||||
style="fill:#b3e5ce;fill-opacity:1;stroke:#3aa875;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-6-3-1"
|
||||
width="22.364935"
|
||||
height="13.330354"
|
||||
x="130.84671"
|
||||
y="136.95265"
|
||||
ry="3.4270811"
|
||||
rx="3.4270811" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.264999;stop-color:#000000"
|
||||
x="135.40543"
|
||||
y="145.67805"
|
||||
id="text1-7-5-7-7"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.265"
|
||||
x="135.40543"
|
||||
y="145.67805"
|
||||
id="tspan2-3-6">Milvus</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g23"
|
||||
transform="translate(-9.0736296,-143.94078)">
|
||||
<rect
|
||||
style="fill:#b3e5ce;fill-opacity:1;stroke:#3aa875;stroke-width:0.999999;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect1-5-8-6-3-1-7"
|
||||
width="33.609211"
|
||||
height="13.330354"
|
||||
x="183.32709"
|
||||
y="157.13333"
|
||||
ry="3.4270811"
|
||||
rx="3.4270809" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.264999;stop-color:#000000"
|
||||
x="188.30377"
|
||||
y="165.74301"
|
||||
id="text1-7-5-7-7-7"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;fill:#3aa875;fill-opacity:1;stroke-width:0.265"
|
||||
x="188.30377"
|
||||
y="165.74301"
|
||||
id="tspan2-3-6-3">Cassandra</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#ArrowWide)"
|
||||
d="m 74.825812,69.490204 9.926188,0"
|
||||
id="path16"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g9"
|
||||
inkscape:connection-end="#g10" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 107.11693,69.490204 9.92619,0"
|
||||
id="path17"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g10"
|
||||
inkscape:connection-end="#g11" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 139.40806,69.490204 9.92618,0"
|
||||
id="path18"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g11"
|
||||
inkscape:connection-end="#g12" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 171.50053,65.095384 8.57372,-3.430496"
|
||||
id="path19"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g12"
|
||||
inkscape:connection-end="#g14" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 171.50098,73.883754 8.5829,3.433036"
|
||||
id="path20"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g12"
|
||||
inkscape:connection-end="#g15" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-start:url(#ArrowWideHeavy);marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 160.52544,76.155381 0.0217,16.601416"
|
||||
id="path21"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g12"
|
||||
inkscape:connection-end="#g13" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 28.236695,69.490204 24.224182,0"
|
||||
id="path22"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g21"
|
||||
inkscape:connection-end="#g9" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 191.06815,88.375517 -1e-5,39.886053"
|
||||
id="path26"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g15"
|
||||
inkscape:connection-end="#g26" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 191.05806,50.604891 0,-24.081986"
|
||||
id="path27"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g14"
|
||||
inkscape:connection-end="#g23" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 160.55591,106.08715 0,22.33534"
|
||||
id="path28"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g13"
|
||||
inkscape:connection-end="#g25" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 160.55591,141.75284 10e-6,14.6327"
|
||||
id="path29"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g25"
|
||||
inkscape:connection-end="#g24" />
|
||||
<g
|
||||
id="g32"
|
||||
transform="translate(134.70625,3.8839786)">
|
||||
<g
|
||||
id="g31">
|
||||
<path
|
||||
id="path30-6"
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:#b3e5ce;fill-opacity:1;fill-rule:evenodd;stroke:#3aa875;stroke-width:0.999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;stop-color:#000000;stop-opacity:1"
|
||||
d="m 80.706141,123.03153 v 17.54957 0.24064 h 0.05366 c 0.458094,1.34305 4.610115,2.37096 9.584197,2.37275 4.974205,-0.002 9.126412,-1.02967 9.584518,-2.37275 h 0.05366 v -0.24064 -17.54957 z"
|
||||
sodipodi:nodetypes="cccccccccc" />
|
||||
<ellipse
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:#b3e5ce;fill-opacity:1;fill-rule:evenodd;stroke:#3aa875;stroke-width:0.999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;stop-color:#000000;stop-opacity:1"
|
||||
id="path30"
|
||||
cx="90.344154"
|
||||
cy="122.50703"
|
||||
rx="9.6379652"
|
||||
ry="2.6134403" />
|
||||
</g>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-variant-east-asian:normal;font-feature-settings:normal;font-variation-settings:normal;text-indent:0;text-align:center;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:middle;white-space:normal;opacity:1;vector-effect:none;fill:#3aa875;fill-opacity:1;stroke-width:0.264999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;stop-color:#000000;stop-opacity:1"
|
||||
x="90.346451"
|
||||
y="133.7585"
|
||||
id="text1-7-3-0-6-5-5-1-0-3"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-variant-east-asian:normal;font-feature-settings:normal;font-variation-settings:normal;text-indent:0;text-align:center;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:middle;white-space:normal;vector-effect:none;fill:#3aa875;fill-opacity:1;stroke-width:0.264999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;stop-color:#000000;stop-opacity:1"
|
||||
x="90.346451"
|
||||
y="133.7585"
|
||||
id="tspan30-0">Embeds</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g33"
|
||||
transform="translate(-3.1749999)">
|
||||
<path
|
||||
id="path30-6-7"
|
||||
style="font-variation-settings:normal;fill:#b3e5ce;fill-opacity:1;fill-rule:evenodd;stroke:#3aa875;stroke-width:0.999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;stop-color:#000000"
|
||||
d="m 143.71962,11.58775 v 17.54957 0.24064 h 0.0537 c 0.4581,1.34305 4.61012,2.37096 9.5842,2.37275 4.9742,-0.002 9.12641,-1.02967 9.58452,-2.37275 h 0.0537 V 29.13732 11.58775 Z"
|
||||
sodipodi:nodetypes="cccccccccc" />
|
||||
<ellipse
|
||||
style="font-variation-settings:normal;fill:#b3e5ce;fill-opacity:1;fill-rule:evenodd;stroke:#3aa875;stroke-width:0.999999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;stop-color:#000000"
|
||||
id="path30-1"
|
||||
cx="153.35764"
|
||||
cy="11.063255"
|
||||
rx="9.6379652"
|
||||
ry="2.6134403" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3aa875;fill-opacity:1;stroke-width:0.264999;stop-color:#000000"
|
||||
x="153.35764"
|
||||
y="21.702637"
|
||||
id="text1-7-3-0-6-5-5-1-0"><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3aa875;fill-opacity:1;stroke-width:0.265"
|
||||
x="153.35764"
|
||||
y="21.702637"
|
||||
id="tspan7-9-1">RDF</tspan><tspan
|
||||
sodipodi:role="line"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.58611px;line-height:0.7;font-family:'Lamebrain (BRK)';-inkscape-font-specification:'Lamebrain (BRK), Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;text-align:center;text-anchor:middle;fill:#3aa875;fill-opacity:1;stroke-width:0.265"
|
||||
x="153.35764"
|
||||
y="24.912914"
|
||||
id="tspan30">edges</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 202.25061,135.0917 13.16178,0.19415"
|
||||
id="path32"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g26"
|
||||
inkscape:connection-end="#g32" />
|
||||
<path
|
||||
style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;-inkscape-stroke:none;marker-end:url(#ArrowWide);stop-color:#000000;stop-opacity:1"
|
||||
d="m 174.25346,19.957438 -14.43272,0.08564"
|
||||
id="path33"
|
||||
inkscape:connector-type="polyline"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:connection-start="#g23"
|
||||
inkscape:connection-end="#g33" />
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 37 KiB |
172
docker-compose-azure.yaml
Normal file
172
docker-compose-azure.yaml
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
|
||||
volumes:
|
||||
cassandra:
|
||||
pulsar-conf:
|
||||
pulsar-data:
|
||||
etcd:
|
||||
minio-data:
|
||||
milvus:
|
||||
|
||||
services:
|
||||
|
||||
cassandra:
|
||||
image: docker.io/cassandra:4.1.5
|
||||
ports:
|
||||
- "9042:9042"
|
||||
volumes:
|
||||
- "cassandra:/var/lib/cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
pulsar:
|
||||
image: docker.io/apachepulsar/pulsar:3.3.0
|
||||
command: bin/pulsar standalone
|
||||
ports:
|
||||
- "6650:6650"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- "pulsar-conf:/pulsar/conf"
|
||||
- "pulsar-data:/pulsar/data"
|
||||
restart: on-failure:100
|
||||
|
||||
pulsar-manager:
|
||||
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
|
||||
ports:
|
||||
- "9527:9527"
|
||||
- "7750:7750"
|
||||
environment:
|
||||
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
|
||||
restart: on-failure:100
|
||||
|
||||
etcd:
|
||||
image: quay.io/coreos/etcd:v3.5.5
|
||||
command:
|
||||
- "etcd"
|
||||
- "-advertise-client-urls=http://127.0.0.1:2379"
|
||||
- "-listen-client-urls"
|
||||
- "http://0.0.0.0:2379"
|
||||
- "--data-dir"
|
||||
- "/etcd"
|
||||
environment:
|
||||
ETCD_AUTO_COMPACTION_MODE: revision
|
||||
ETCD_AUTO_COMPACTION_RETENTION: "1000"
|
||||
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
|
||||
ETCD_SNAPSHOT_COUNT: "50000"
|
||||
ports:
|
||||
- "2379:2379"
|
||||
volumes:
|
||||
- "etcd:/etcd"
|
||||
restart: on-failure:100
|
||||
|
||||
minio:
|
||||
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
|
||||
command:
|
||||
- "minio"
|
||||
- "server"
|
||||
- "/minio_data"
|
||||
- "--console-address"
|
||||
- ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
ports:
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- "minio-data:/minio_data"
|
||||
restart: on-failure:100
|
||||
|
||||
milvus:
|
||||
image: docker.io/milvusdb/milvus:v2.4.5
|
||||
command:
|
||||
- "milvus"
|
||||
- "run"
|
||||
- "standalone"
|
||||
environment:
|
||||
ETCD_ENDPOINTS: etcd:2379
|
||||
MINIO_ADDRESS: minio:9000
|
||||
ports:
|
||||
- "9091:9091"
|
||||
- "19530:19530"
|
||||
volumes:
|
||||
- "milvus:/var/lib/milvus"
|
||||
restart: on-failure:100
|
||||
|
||||
pdf-decoder:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "pdf-decoder"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
chunker:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "chunker-recursive"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vectorize:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "embeddings-vectorize"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "embeddings-hf"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-definitions:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "kg-extract-definitions"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-relationships:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "kg-extract-relationships"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vector-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "vector-write-milvus"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-t"
|
||||
- "http://milvus:19530"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "graph-write-cassandra"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-g"
|
||||
- "cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
llm:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "llm-azure-text"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-k"
|
||||
- ${AZURE_TOKEN}
|
||||
- "-e"
|
||||
- ${AZURE_ENDPOINT}
|
||||
restart: on-failure:100
|
||||
|
||||
170
docker-compose-claude.yaml
Normal file
170
docker-compose-claude.yaml
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
|
||||
volumes:
|
||||
cassandra:
|
||||
pulsar-conf:
|
||||
pulsar-data:
|
||||
etcd:
|
||||
minio-data:
|
||||
milvus:
|
||||
|
||||
services:
|
||||
|
||||
cassandra:
|
||||
image: docker.io/cassandra:4.1.5
|
||||
ports:
|
||||
- "9042:9042"
|
||||
volumes:
|
||||
- "cassandra:/var/lib/cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
pulsar:
|
||||
image: docker.io/apachepulsar/pulsar:3.3.0
|
||||
command: bin/pulsar standalone
|
||||
ports:
|
||||
- "6650:6650"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- "pulsar-conf:/pulsar/conf"
|
||||
- "pulsar-data:/pulsar/data"
|
||||
restart: on-failure:100
|
||||
|
||||
pulsar-manager:
|
||||
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
|
||||
ports:
|
||||
- "9527:9527"
|
||||
- "7750:7750"
|
||||
environment:
|
||||
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
|
||||
restart: on-failure:100
|
||||
|
||||
etcd:
|
||||
image: quay.io/coreos/etcd:v3.5.5
|
||||
command:
|
||||
- "etcd"
|
||||
- "-advertise-client-urls=http://127.0.0.1:2379"
|
||||
- "-listen-client-urls"
|
||||
- "http://0.0.0.0:2379"
|
||||
- "--data-dir"
|
||||
- "/etcd"
|
||||
environment:
|
||||
ETCD_AUTO_COMPACTION_MODE: revision
|
||||
ETCD_AUTO_COMPACTION_RETENTION: "1000"
|
||||
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
|
||||
ETCD_SNAPSHOT_COUNT: "50000"
|
||||
ports:
|
||||
- "2379:2379"
|
||||
volumes:
|
||||
- "etcd:/etcd"
|
||||
restart: on-failure:100
|
||||
|
||||
minio:
|
||||
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
|
||||
command:
|
||||
- "minio"
|
||||
- "server"
|
||||
- "/minio_data"
|
||||
- "--console-address"
|
||||
- ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
ports:
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- "minio-data:/minio_data"
|
||||
restart: on-failure:100
|
||||
|
||||
milvus:
|
||||
image: docker.io/milvusdb/milvus:v2.4.5
|
||||
command:
|
||||
- "milvus"
|
||||
- "run"
|
||||
- "standalone"
|
||||
environment:
|
||||
ETCD_ENDPOINTS: etcd:2379
|
||||
MINIO_ADDRESS: minio:9000
|
||||
ports:
|
||||
- "9091:9091"
|
||||
- "19530:19530"
|
||||
volumes:
|
||||
- "milvus:/var/lib/milvus"
|
||||
restart: on-failure:100
|
||||
|
||||
pdf-decoder:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "pdf-decoder"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
chunker:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "chunker-recursive"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vectorize:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "embeddings-vectorize"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "embeddings-hf"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-definitions:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "kg-extract-definitions"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-relationships:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "kg-extract-relationships"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vector-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "vector-write-milvus"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-t"
|
||||
- "http://milvus:19530"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "graph-write-cassandra"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-g"
|
||||
- "cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
llm:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "llm-claude-text"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-k"
|
||||
- ${CLAUDE_KEY}
|
||||
restart: on-failure:100
|
||||
|
||||
178
docker-compose-ollama.yaml
Normal file
178
docker-compose-ollama.yaml
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
|
||||
volumes:
|
||||
cassandra:
|
||||
pulsar-conf:
|
||||
pulsar-data:
|
||||
etcd:
|
||||
minio-data:
|
||||
milvus:
|
||||
|
||||
services:
|
||||
|
||||
cassandra:
|
||||
image: docker.io/cassandra:4.1.5
|
||||
ports:
|
||||
- "9042:9042"
|
||||
volumes:
|
||||
- "cassandra:/var/lib/cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
pulsar:
|
||||
image: docker.io/apachepulsar/pulsar:3.3.0
|
||||
command: bin/pulsar standalone
|
||||
ports:
|
||||
- "6650:6650"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- "pulsar-conf:/pulsar/conf"
|
||||
- "pulsar-data:/pulsar/data"
|
||||
restart: on-failure:100
|
||||
|
||||
pulsar-manager:
|
||||
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
|
||||
ports:
|
||||
- "9527:9527"
|
||||
- "7750:7750"
|
||||
environment:
|
||||
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
|
||||
restart: on-failure:100
|
||||
|
||||
etcd:
|
||||
image: quay.io/coreos/etcd:v3.5.5
|
||||
command:
|
||||
- "etcd"
|
||||
- "-advertise-client-urls=http://127.0.0.1:2379"
|
||||
- "-listen-client-urls"
|
||||
- "http://0.0.0.0:2379"
|
||||
- "--data-dir"
|
||||
- "/etcd"
|
||||
environment:
|
||||
ETCD_AUTO_COMPACTION_MODE: revision
|
||||
ETCD_AUTO_COMPACTION_RETENTION: "1000"
|
||||
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
|
||||
ETCD_SNAPSHOT_COUNT: "50000"
|
||||
ports:
|
||||
- "2379:2379"
|
||||
volumes:
|
||||
- "etcd:/etcd"
|
||||
restart: on-failure:100
|
||||
|
||||
minio:
|
||||
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
|
||||
command:
|
||||
- "minio"
|
||||
- "server"
|
||||
- "/minio_data"
|
||||
- "--console-address"
|
||||
- ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
ports:
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- "minio-data:/minio_data"
|
||||
restart: on-failure:100
|
||||
|
||||
milvus:
|
||||
image: docker.io/milvusdb/milvus:v2.4.5
|
||||
command:
|
||||
- "milvus"
|
||||
- "run"
|
||||
- "standalone"
|
||||
environment:
|
||||
ETCD_ENDPOINTS: etcd:2379
|
||||
MINIO_ADDRESS: minio:9000
|
||||
ports:
|
||||
- "9091:9091"
|
||||
- "19530:19530"
|
||||
volumes:
|
||||
- "milvus:/var/lib/milvus"
|
||||
restart: on-failure:100
|
||||
|
||||
pdf-decoder:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "pdf-decoder"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
chunker:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "chunker-recursive"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vectorize:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "embeddings-vectorize"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "embeddings-hf"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-definitions:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "kg-extract-definitions"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-relationships:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "kg-extract-relationships"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vector-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "vector-write-milvus"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-t"
|
||||
- "http://milvus:19530"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "graph-write-cassandra"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-g"
|
||||
- "cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
llm:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "llm-ollama-text"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-r"
|
||||
- "http://${OLLAMA_HOST}:11434/"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-rag:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "graph-rag"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
174
docker-compose-vertexai.yaml
Normal file
174
docker-compose-vertexai.yaml
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
|
||||
volumes:
|
||||
cassandra:
|
||||
pulsar-conf:
|
||||
pulsar-data:
|
||||
etcd:
|
||||
minio-data:
|
||||
milvus:
|
||||
|
||||
services:
|
||||
|
||||
cassandra:
|
||||
image: docker.io/cassandra:4.1.5
|
||||
ports:
|
||||
- "9042:9042"
|
||||
volumes:
|
||||
- "cassandra:/var/lib/cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
pulsar:
|
||||
image: docker.io/apachepulsar/pulsar:3.3.0
|
||||
command: bin/pulsar standalone
|
||||
ports:
|
||||
- "6650:6650"
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- "pulsar-conf:/pulsar/conf"
|
||||
- "pulsar-data:/pulsar/data"
|
||||
restart: on-failure:100
|
||||
|
||||
pulsar-manager:
|
||||
image: docker.io/apachepulsar/pulsar-manager:v0.3.0
|
||||
ports:
|
||||
- "9527:9527"
|
||||
- "7750:7750"
|
||||
environment:
|
||||
SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties
|
||||
restart: on-failure:100
|
||||
|
||||
etcd:
|
||||
image: quay.io/coreos/etcd:v3.5.5
|
||||
command:
|
||||
- "etcd"
|
||||
- "-advertise-client-urls=http://127.0.0.1:2379"
|
||||
- "-listen-client-urls"
|
||||
- "http://0.0.0.0:2379"
|
||||
- "--data-dir"
|
||||
- "/etcd"
|
||||
environment:
|
||||
ETCD_AUTO_COMPACTION_MODE: revision
|
||||
ETCD_AUTO_COMPACTION_RETENTION: "1000"
|
||||
ETCD_QUOTA_BACKEND_BYTES: "4294967296"
|
||||
ETCD_SNAPSHOT_COUNT: "50000"
|
||||
ports:
|
||||
- "2379:2379"
|
||||
volumes:
|
||||
- "etcd:/etcd"
|
||||
restart: on-failure:100
|
||||
|
||||
minio:
|
||||
image: docker.io/minio/minio:RELEASE.2024-07-04T14-25-45Z
|
||||
command:
|
||||
- "minio"
|
||||
- "server"
|
||||
- "/minio_data"
|
||||
- "--console-address"
|
||||
- ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
ports:
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- "minio-data:/minio_data"
|
||||
restart: on-failure:100
|
||||
|
||||
milvus:
|
||||
image: docker.io/milvusdb/milvus:v2.4.5
|
||||
command:
|
||||
- "milvus"
|
||||
- "run"
|
||||
- "standalone"
|
||||
environment:
|
||||
ETCD_ENDPOINTS: etcd:2379
|
||||
MINIO_ADDRESS: minio:9000
|
||||
ports:
|
||||
- "9091:9091"
|
||||
- "19530:19530"
|
||||
volumes:
|
||||
- "milvus:/var/lib/milvus"
|
||||
restart: on-failure:100
|
||||
|
||||
pdf-decoder:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "pdf-decoder"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
chunker:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "chunker-recursive"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vectorize:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "embeddings-vectorize"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
embeddings:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "embeddings-hf"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-definitions:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "kg-extract-definitions"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
kg-extract-relationships:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "kg-extract-relationships"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
restart: on-failure:100
|
||||
|
||||
vector-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "vector-write-milvus"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-t"
|
||||
- "http://milvus:19530"
|
||||
restart: on-failure:100
|
||||
|
||||
graph-write:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "graph-write-cassandra"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-g"
|
||||
- "cassandra"
|
||||
restart: on-failure:100
|
||||
|
||||
llm:
|
||||
image: docker.io/trustgraph/trustgraph-flow:0.1.16
|
||||
command:
|
||||
- "llm-vertexai-text"
|
||||
- "-p"
|
||||
- "pulsar://pulsar:6650"
|
||||
- "-k"
|
||||
- "/vertexai/private.json"
|
||||
- "-r"
|
||||
- "us-west1"
|
||||
volumes:
|
||||
- "./vertexai:/vertexai"
|
||||
restart: on-failure:100
|
||||
|
||||
8
graph-clear
Executable file
8
graph-clear
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph import TrustGraph
|
||||
|
||||
t = TrustGraph()
|
||||
|
||||
t.clear()
|
||||
|
||||
70
graph-dump
Executable file
70
graph-dump
Executable file
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema, Bytes
|
||||
from schema import Chunk, Triple
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
from langchain_community.llms import Ollama
|
||||
from trustgraphETL import scholar, callmixtral, build_graph_robust
|
||||
import sys
|
||||
import rdflib
|
||||
import uuid
|
||||
|
||||
g = rdflib.Graph()
|
||||
|
||||
client = pulsar.Client("pulsar://localhost:6650")
|
||||
|
||||
consumer = client.subscribe(
|
||||
'graph-load', 'graph-dump',
|
||||
schema=JsonSchema(Triple),
|
||||
)
|
||||
|
||||
g = rdflib.Graph()
|
||||
count = 0
|
||||
limit = 100
|
||||
|
||||
while True:
|
||||
|
||||
msg = consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
if v.o.is_uri:
|
||||
g.add((
|
||||
rdflib.term.URIRef(v.s.value),
|
||||
rdflib.term.URIRef(v.p.value),
|
||||
rdflib.term.URIRef(v.o.value),
|
||||
))
|
||||
else:
|
||||
g.add((
|
||||
rdflib.term.URIRef(v.s.value),
|
||||
rdflib.term.URIRef(v.p.value),
|
||||
rdflib.term.Literal(v.o.value),
|
||||
))
|
||||
|
||||
count += 1
|
||||
|
||||
if count > limit:
|
||||
|
||||
id = str(uuid.uuid4())
|
||||
path = f"graph/{id}.ttl"
|
||||
g.serialize(destination=path)
|
||||
g = rdflib.Graph()
|
||||
print(f"Written {path}")
|
||||
|
||||
count = 0
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(e)
|
||||
|
||||
# Message failed to be processed
|
||||
consumer.negative_acknowledge(msg)
|
||||
|
||||
client.close()
|
||||
|
||||
16
requirements.txt
Normal file
16
requirements.txt
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
torch
|
||||
urllib3
|
||||
transformers
|
||||
sentence-transformers
|
||||
rdflib
|
||||
pymilvus
|
||||
langchain
|
||||
langchain-core
|
||||
langchain-huggingface
|
||||
langchain-text-splitters
|
||||
langchain-community
|
||||
huggingface-hub
|
||||
cassandra-driver
|
||||
pulsar-client
|
||||
anthropic
|
||||
google-cloud-aiplatform
|
||||
6
scripts/chunker-recursive
Executable file
6
scripts/chunker-recursive
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.chunker.recursive import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/embeddings-hf
Executable file
6
scripts/embeddings-hf
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.embeddings.hf import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/embeddings-vectorize
Executable file
6
scripts/embeddings-vectorize
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.embeddings.vectorize import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/graph-rag
Executable file
6
scripts/graph-rag
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.rag.graph import run
|
||||
|
||||
run()
|
||||
|
||||
10
scripts/graph-show
Executable file
10
scripts/graph-show
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.trustgraph import TrustGraph
|
||||
|
||||
t = TrustGraph()
|
||||
|
||||
rows = t.get_all(limit=100_000_000)
|
||||
for s, p, o in rows:
|
||||
print(s, p, o)
|
||||
|
||||
37
scripts/graph-to-turtle
Executable file
37
scripts/graph-to-turtle
Executable file
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.trustgraph import TrustGraph
|
||||
import rdflib
|
||||
import sys
|
||||
import io
|
||||
|
||||
t = TrustGraph()
|
||||
|
||||
g = rdflib.Graph()
|
||||
|
||||
rows = t.get_all(limit=100_000_000)
|
||||
for s, p, o in rows:
|
||||
|
||||
# print(s, p, o)
|
||||
sv = rdflib.term.URIRef(s)
|
||||
pv = rdflib.term.URIRef(p)
|
||||
|
||||
if o.startswith("https://") or o.startswith("http://"):
|
||||
|
||||
# Skip malformed URLs with spaces in
|
||||
if " " in o:
|
||||
continue
|
||||
|
||||
ov = rdflib.term.URIRef(o)
|
||||
else:
|
||||
ov = rdflib.term.Literal(o)
|
||||
|
||||
g.add((sv, pv, ov))
|
||||
|
||||
g.serialize(destination="output.ttl", format="turtle")
|
||||
|
||||
buf = io.BytesIO()
|
||||
|
||||
g.serialize(destination=buf, format="turtle")
|
||||
|
||||
sys.stdout.write(buf.getvalue().decode("utf-8"))
|
||||
6
scripts/graph-write-cassandra
Executable file
6
scripts/graph-write-cassandra
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.graph.cassandra_write import run
|
||||
|
||||
run()
|
||||
|
||||
11
scripts/init-pulsar-manager
Executable file
11
scripts/init-pulsar-manager
Executable file
|
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
CSRF_TOKEN=$(curl http://localhost:7750/pulsar-manager/csrf-token)
|
||||
|
||||
curl \
|
||||
-H "X-XSRF-TOKEN: $CSRF_TOKEN" \
|
||||
-H "Cookie: XSRF-TOKEN=$CSRF_TOKEN;" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-X PUT \
|
||||
http://localhost:7750/pulsar-manager/users/superuser \
|
||||
-d '{"name": "admin", "password": "apachepulsar", "description": "test", "email": "username@test.org"}'
|
||||
6
scripts/kg-extract-definitions
Executable file
6
scripts/kg-extract-definitions
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.kg.extract_definitions import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/kg-extract-relationships
Executable file
6
scripts/kg-extract-relationships
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.kg.extract_relationships import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/llm-azure-text
Executable file
6
scripts/llm-azure-text
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.llm.azure_text import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/llm-claude-text
Executable file
6
scripts/llm-claude-text
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.llm.claude_text import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/llm-ollama-text
Executable file
6
scripts/llm-ollama-text
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.llm.ollama_text import run
|
||||
|
||||
run()
|
||||
|
||||
6
scripts/llm-vertexai-text
Executable file
6
scripts/llm-vertexai-text
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.llm.vertexai_text import run
|
||||
|
||||
run()
|
||||
|
||||
47
scripts/loader
Executable file
47
scripts/loader
Executable file
|
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema, Bytes, String
|
||||
from trustgraph.schema import Document, Source
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
# client = pulsar.Client("pulsar://localhost:6650")
|
||||
host="10.89.1.246"
|
||||
host="localhost"
|
||||
client = pulsar.Client(f"pulsar://{host}:6650")
|
||||
|
||||
producer = client.create_producer(
|
||||
topic='document-load',
|
||||
schema=JsonSchema(Document),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
files=[
|
||||
"Challenger-Report-Vol1.pdf",
|
||||
# "columbia-accident-investigation-board-report-volume-1.pdf",
|
||||
# "Proposed_CIRCIA_Rules.pdf",
|
||||
]
|
||||
|
||||
for file in files:
|
||||
|
||||
path = "sources/" + file
|
||||
data = open(path, "rb").read()
|
||||
|
||||
id = hashlib.sha256(path.encode("utf-8")).hexdigest()[0:8]
|
||||
|
||||
r = Document(
|
||||
source=Source(
|
||||
source=path,
|
||||
title=path,
|
||||
id=id,
|
||||
),
|
||||
data=base64.b64encode(data),
|
||||
)
|
||||
|
||||
resp = producer.send(r)
|
||||
|
||||
print(resp)
|
||||
|
||||
client.close()
|
||||
|
||||
6
scripts/pdf-decoder
Executable file
6
scripts/pdf-decoder
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.decoder.pdf import run
|
||||
|
||||
run()
|
||||
|
||||
16
scripts/query
Executable file
16
scripts/query
Executable file
|
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.graph_rag import GraphRag
|
||||
import sys
|
||||
|
||||
query = " ".join(sys.argv[1:])
|
||||
|
||||
gr = GraphRag(verbose=True)
|
||||
|
||||
if query == "":
|
||||
query="""This knowledge graph describes the Space Shuttle disaster.
|
||||
Present 20 facts which are present in the knowledge graph."""
|
||||
|
||||
resp = gr.query(query)
|
||||
print(resp)
|
||||
|
||||
6
scripts/vector-write-milvus
Executable file
6
scripts/vector-write-milvus
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from trustgraph.vector.milvus_write import run
|
||||
|
||||
run()
|
||||
|
||||
65
setup.py
Normal file
65
setup.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import setuptools
|
||||
import os
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
version = "0.0.0"
|
||||
|
||||
setuptools.setup(
|
||||
name="trustgraph",
|
||||
version=version,
|
||||
author="trustgraph.ai",
|
||||
author_email="security@trustgraph.ai",
|
||||
description="trustgraph.ai",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/trustgraph.ai/FIXME.git",
|
||||
packages=setuptools.find_packages(),
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
python_requires='>=3.8',
|
||||
download_url = "https://github.com/trustgraph.ai/FIXME.git/archive/refs/tags/v" + version + ".tar.gz",
|
||||
install_requires=[
|
||||
"torch",
|
||||
"urllib3",
|
||||
"transformers",
|
||||
"sentence-transformers",
|
||||
"rdflib",
|
||||
"pymilvus",
|
||||
"langchain",
|
||||
"langchain-core",
|
||||
"langchain-huggingface",
|
||||
"langchain-text-splitters",
|
||||
"langchain-community",
|
||||
"huggingface-hub",
|
||||
"requests",
|
||||
"cassandra-driver",
|
||||
"pulsar-client",
|
||||
"pypdf",
|
||||
"anthropic",
|
||||
"google-cloud-aiplatform",
|
||||
],
|
||||
scripts=[
|
||||
"scripts/chunker-recursive",
|
||||
"scripts/graph-show",
|
||||
"scripts/graph-to-turtle",
|
||||
"scripts/graph-write-cassandra",
|
||||
"scripts/kg-extract-definitions",
|
||||
"scripts/kg-extract-relationships",
|
||||
"scripts/llm-ollama-text",
|
||||
"scripts/llm-vertexai-text",
|
||||
"scripts/llm-claude-text",
|
||||
"scripts/llm-azure-text",
|
||||
"scripts/loader",
|
||||
"scripts/pdf-decoder",
|
||||
"scripts/query",
|
||||
"scripts/embeddings-vectorize",
|
||||
"scripts/embeddings-hf",
|
||||
"scripts/vector-write-milvus",
|
||||
"scripts/graph-rag",
|
||||
]
|
||||
)
|
||||
15
tests/test-embeddings
Executable file
15
tests/test-embeddings
Executable file
|
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pulsar
|
||||
from trustgraph.embeddings_client import EmbeddingsClient
|
||||
|
||||
embed = EmbeddingsClient(pulsar_host="pulsar://localhost:6650")
|
||||
|
||||
prompt="Write a funny limerick about a llama"
|
||||
|
||||
resp = embed.request(prompt)
|
||||
|
||||
print(resp)
|
||||
|
||||
|
||||
|
||||
14
tests/test-graph-rag
Executable file
14
tests/test-graph-rag
Executable file
|
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pulsar
|
||||
from trustgraph.graph_rag_client import GraphRagClient
|
||||
|
||||
rag = GraphRagClient(pulsar_host="pulsar://localhost:6650")
|
||||
|
||||
query="""This knowledge graph describes the Space Shuttle disaster.
|
||||
Present 20 facts which are present in the knowledge graph."""
|
||||
|
||||
resp = rag.request(query)
|
||||
|
||||
print(resp)
|
||||
|
||||
15
tests/test-llm
Executable file
15
tests/test-llm
Executable file
|
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pulsar
|
||||
from trustgraph.llm_client import LlmClient
|
||||
|
||||
llm = LlmClient(pulsar_host="pulsar://localhost:6650")
|
||||
|
||||
prompt="Write a funny limerick about a llama"
|
||||
|
||||
resp = llm.request(prompt)
|
||||
|
||||
print(resp)
|
||||
|
||||
llm.close()
|
||||
|
||||
35
tests/test-milvus
Executable file
35
tests/test-milvus
Executable file
|
|
@ -0,0 +1,35 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
|
||||
from edge_map import VectorStore
|
||||
|
||||
client = VectorStore()
|
||||
|
||||
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
||||
|
||||
text="""A cat is a small animal. A dog is a large animal.
|
||||
Cats say miaow. Dogs go woof.
|
||||
"""
|
||||
|
||||
embeds = embeddings.embed_documents([text])[0]
|
||||
|
||||
text2="""If you couldn't download the model due to network issues, as a walkaround, you can use random vectors to represent the text and still finish the example. Just note that the search result won't reflect semantic similarity as the vectors are fake ones.
|
||||
"""
|
||||
|
||||
embeds2 = embeddings.embed_documents([text2])[0]
|
||||
|
||||
client.insert(embeds, "animals")
|
||||
client.insert(embeds, "vectors")
|
||||
|
||||
query="""What noise does a cat make?"""
|
||||
|
||||
qembeds = embeddings.embed_documents([query])[0]
|
||||
|
||||
res = client.search(
|
||||
qembeds,
|
||||
limit=2
|
||||
)
|
||||
|
||||
print(res)
|
||||
|
||||
0
trustgraph/__init__.py
Normal file
0
trustgraph/__init__.py
Normal file
0
trustgraph/chunker/__init__.py
Normal file
0
trustgraph/chunker/__init__.py
Normal file
3
trustgraph/chunker/recursive/__init__.py
Normal file
3
trustgraph/chunker/recursive/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . chunker import *
|
||||
|
||||
7
trustgraph/chunker/recursive/__main__.py
Normal file
7
trustgraph/chunker/recursive/__main__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . chunker import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
164
trustgraph/chunker/recursive/chunker.py
Executable file
164
trustgraph/chunker/recursive/chunker.py
Executable file
|
|
@ -0,0 +1,164 @@
|
|||
|
||||
"""
|
||||
Simple decoder, accepts text documents on input, outputs chunks from the
|
||||
as text as separate output objects.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
import time
|
||||
|
||||
from ... schema import TextDocument, Chunk, Source
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(TextDocument),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(Chunk),
|
||||
)
|
||||
|
||||
self.text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=1000,
|
||||
chunk_overlap=20,
|
||||
length_function=len,
|
||||
is_separator_regex=False,
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
print(f"Chunking {v.source.id}...", flush=True)
|
||||
|
||||
texts = self.text_splitter.create_documents(
|
||||
[v.text.decode("utf-8")]
|
||||
)
|
||||
|
||||
for ix, chunk in enumerate(texts):
|
||||
|
||||
id = v.source.id + "-c" + str(ix)
|
||||
|
||||
r = Chunk(
|
||||
source=Source(
|
||||
source=v.source.source,
|
||||
id=id,
|
||||
title=v.source.title
|
||||
),
|
||||
chunk=chunk.page_content.encode("utf-8"),
|
||||
)
|
||||
|
||||
self.producer.send(r)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='pdf-decoder',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'text-doc-load'
|
||||
default_output_queue = 'chunk-load'
|
||||
default_subscriber = 'chunker-recursive'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
0
trustgraph/decoder/__init__.py
Normal file
0
trustgraph/decoder/__init__.py
Normal file
3
trustgraph/decoder/pdf/__init__.py
Normal file
3
trustgraph/decoder/pdf/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . pdf_decoder import *
|
||||
|
||||
7
trustgraph/decoder/pdf/__main__.py
Executable file
7
trustgraph/decoder/pdf/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . pdf_decoder import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
159
trustgraph/decoder/pdf/pdf_decoder.py
Executable file
159
trustgraph/decoder/pdf/pdf_decoder.py
Executable file
|
|
@ -0,0 +1,159 @@
|
|||
|
||||
"""
|
||||
Simple decoder, accepts PDF documents on input, outputs pages from the
|
||||
PDF document as text as separate output objects.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from ... schema import Document, TextDocument, Source
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(Document),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(TextDocument),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
print(f"Decoding {v.source.id}...", flush=True)
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete_on_close=False) as fp:
|
||||
|
||||
fp.write(base64.b64decode(v.data))
|
||||
fp.close()
|
||||
|
||||
with open(fp.name, mode='rb') as f:
|
||||
|
||||
loader = PyPDFLoader(fp.name)
|
||||
pages = loader.load()
|
||||
|
||||
for ix, page in enumerate(pages):
|
||||
|
||||
id = v.source.id + "-p" + str(ix)
|
||||
r = TextDocument(
|
||||
source=Source(
|
||||
source=v.source.source,
|
||||
title=v.source.title,
|
||||
id=id,
|
||||
),
|
||||
text=page.page_content.encode("utf-8"),
|
||||
)
|
||||
|
||||
self.producer.send(r)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
except Exception as e:
|
||||
print(e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='pdf-decoder',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'document-load'
|
||||
default_output_queue = 'text-doc-load'
|
||||
default_subscriber = 'pdf-decoder'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
102
trustgraph/edge_map.py
Normal file
102
trustgraph/edge_map.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
|
||||
from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType
|
||||
|
||||
class VectorStore:
|
||||
|
||||
def __init__(self, uri="http://localhost:19530"):
|
||||
|
||||
self.client = MilvusClient(uri=uri)
|
||||
|
||||
self.collection = "edges"
|
||||
self.dimension = 384
|
||||
|
||||
if not self.client.has_collection(collection_name=self.collection):
|
||||
self.init_collection()
|
||||
|
||||
def init_collection(self):
|
||||
|
||||
pkey_field = FieldSchema(
|
||||
name="id",
|
||||
dtype=DataType.INT64,
|
||||
is_primary=True,
|
||||
auto_id=True,
|
||||
)
|
||||
|
||||
vec_field = FieldSchema(
|
||||
name="vector",
|
||||
dtype=DataType.FLOAT_VECTOR,
|
||||
dim=self.dimension,
|
||||
)
|
||||
|
||||
entity_field = FieldSchema(
|
||||
name="entity",
|
||||
dtype=DataType.VARCHAR,
|
||||
max_length=65535,
|
||||
)
|
||||
|
||||
schema = CollectionSchema(
|
||||
fields = [pkey_field, vec_field, entity_field],
|
||||
description = "Edge map schema",
|
||||
)
|
||||
|
||||
self.client.create_collection(
|
||||
collection_name=self.collection,
|
||||
schema=schema,
|
||||
metric_type="IP",
|
||||
)
|
||||
|
||||
index_params = MilvusClient.prepare_index_params()
|
||||
|
||||
index_params.add_index(
|
||||
field_name="vector",
|
||||
metric_type="COSINE",
|
||||
index_type="FLAT", # IVF_FLAT?!
|
||||
index_name="vector_index",
|
||||
params={ "nlist": 128 }
|
||||
)
|
||||
|
||||
self.client.create_index(
|
||||
collection_name=self.collection,
|
||||
index_params=index_params
|
||||
)
|
||||
|
||||
def insert(self, embeds, entity):
|
||||
|
||||
data = [
|
||||
{
|
||||
"vector": embeds,
|
||||
"entity": entity,
|
||||
}
|
||||
]
|
||||
|
||||
self.client.insert(collection_name=self.collection, data=data)
|
||||
|
||||
def search(self, embeds, fields=["entity"], limit=10):
|
||||
|
||||
search_params = {
|
||||
"metric_type": "COSINE",
|
||||
"params": {
|
||||
"radius": 0.1,
|
||||
"range_filter": 0.8
|
||||
}
|
||||
}
|
||||
|
||||
self.client.load_collection(
|
||||
collection_name=self.collection,
|
||||
# replica_number=1
|
||||
)
|
||||
|
||||
res = self.client.search(
|
||||
collection_name=self.collection,
|
||||
data=[embeds],
|
||||
limit=limit,
|
||||
output_fields=fields,
|
||||
search_params=search_params,
|
||||
)[0]
|
||||
|
||||
self.client.release_collection(
|
||||
collection_name=self.collection,
|
||||
)
|
||||
|
||||
return res
|
||||
|
||||
0
trustgraph/embeddings/__init__.py
Normal file
0
trustgraph/embeddings/__init__.py
Normal file
3
trustgraph/embeddings/hf/__init__.py
Normal file
3
trustgraph/embeddings/hf/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . hf import *
|
||||
|
||||
7
trustgraph/embeddings/hf/__main__.py
Executable file
7
trustgraph/embeddings/hf/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . hf import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
161
trustgraph/embeddings/hf/hf.py
Executable file
161
trustgraph/embeddings/hf/hf.py
Executable file
|
|
@ -0,0 +1,161 @@
|
|||
|
||||
"""
|
||||
Simple LLM service, performs text prompt completion using an Ollama service.
|
||||
Input is prompt, output is response.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
import time
|
||||
|
||||
from ... schema import EmbeddingsRequest, EmbeddingsResponse
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
model,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(EmbeddingsRequest),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(EmbeddingsResponse),
|
||||
)
|
||||
|
||||
self.embeddings = HuggingFaceEmbeddings(model_name=model)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
|
||||
text = v.text
|
||||
embeds = self.embeddings.embed_documents([text])
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = EmbeddingsResponse(vectors=embeds)
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
print("Closing", flush=True)
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='llm-ollama-text',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'embeddings'
|
||||
default_output_queue = 'embeddings-response'
|
||||
default_subscriber = 'embeddings-hf'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-m', '--model',
|
||||
default="all-MiniLM-L6-v2",
|
||||
help=f'LLM model (default: all-MiniLM-L6-v2)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
model=args.model,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
3
trustgraph/embeddings/vectorize/__init__.py
Normal file
3
trustgraph/embeddings/vectorize/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . vectorize import *
|
||||
|
||||
6
trustgraph/embeddings/vectorize/__main__.py
Executable file
6
trustgraph/embeddings/vectorize/__main__.py
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
from . vectorize import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
167
trustgraph/embeddings/vectorize/vectorize.py
Executable file
167
trustgraph/embeddings/vectorize/vectorize.py
Executable file
|
|
@ -0,0 +1,167 @@
|
|||
|
||||
"""
|
||||
Vectorizer, applies an embedding algorithm to a chunk. Input is a chunk,
|
||||
output is chunk and vectors.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from ... schema import Chunk, VectorsChunk
|
||||
from ... embeddings_client import EmbeddingsClient
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
model,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(Chunk),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(VectorsChunk),
|
||||
)
|
||||
|
||||
self.embeddings = EmbeddingsClient(pulsar_host=pulsar_host)
|
||||
|
||||
def emit(self, source, chunk, vectors):
|
||||
|
||||
r = VectorsChunk(source=source, chunk=chunk, vectors=vectors)
|
||||
self.producer.send(r)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.source.id}...", flush=True)
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
try:
|
||||
|
||||
vectors = self.embeddings.request(chunk)
|
||||
|
||||
self.emit(
|
||||
source=v.source,
|
||||
chunk=chunk.encode("utf-8"),
|
||||
vectors=vectors
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='embeddings-vectorizer',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'chunk-load'
|
||||
default_output_queue = 'vectors-chunk-load'
|
||||
default_subscriber = 'embeddings-vectorizer'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-m', '--model',
|
||||
default="all-MiniLM-L6-v2",
|
||||
help=f'LLM model (default: all-MiniLM-L6-v2)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
model=args.model,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
70
trustgraph/embeddings_client.py
Normal file
70
trustgraph/embeddings_client.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pulsar
|
||||
import _pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema import EmbeddingsRequest, EmbeddingsResponse
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class EmbeddingsClient:
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR, client_id=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if client_id == None:
|
||||
client_id = str(uuid.uuid4())
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic='embeddings',
|
||||
schema=JsonSchema(EmbeddingsRequest),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
'embeddings-response', client_id,
|
||||
schema=JsonSchema(EmbeddingsResponse),
|
||||
)
|
||||
|
||||
def request(self, text, timeout=500):
|
||||
|
||||
id = str(uuid.uuid4())
|
||||
|
||||
r = EmbeddingsRequest(
|
||||
text=text
|
||||
)
|
||||
self.producer.send(r, properties={ "id": id })
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive(timeout_millis=timeout * 1000)
|
||||
|
||||
mid = msg.properties()["id"]
|
||||
|
||||
if mid == id:
|
||||
resp = msg.value().vectors
|
||||
self.consumer.acknowledge(msg)
|
||||
return resp
|
||||
|
||||
# Ignore messages with wrong ID
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
|
||||
self.producer.close()
|
||||
self.consumer.close()
|
||||
self.client.close()
|
||||
|
||||
0
trustgraph/graph/__init__.py
Normal file
0
trustgraph/graph/__init__.py
Normal file
3
trustgraph/graph/cassandra_write/__init__.py
Normal file
3
trustgraph/graph/cassandra_write/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph/graph/cassandra_write/__main__.py
Executable file
7
trustgraph/graph/cassandra_write/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
144
trustgraph/graph/cassandra_write/write.py
Executable file
144
trustgraph/graph/cassandra_write/write.py
Executable file
|
|
@ -0,0 +1,144 @@
|
|||
|
||||
"""
|
||||
Simple decoder, accepts PDF documents on input, outputs pages from the
|
||||
PDF document as text as separate output objects.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from ... trustgraph import TrustGraph
|
||||
from ... schema import Triple
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
graph_host,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(Triple),
|
||||
)
|
||||
|
||||
self.tg = TrustGraph([graph_host])
|
||||
|
||||
self.count = 0
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
self.tg.insert(
|
||||
v.s.value,
|
||||
v.p.value,
|
||||
v.o.value
|
||||
)
|
||||
|
||||
self.count += 1
|
||||
|
||||
if (self.count % 1000) == 0:
|
||||
print(self.count, "...", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='graph-write-cassandra',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'graph-load'
|
||||
default_subscriber = 'graph-write-cassandra'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-g', '--graph-host',
|
||||
default="localhost",
|
||||
help=f'Output queue (default: localhost)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
graph_host=args.graph_host,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
227
trustgraph/graph_rag.py
Normal file
227
trustgraph/graph_rag.py
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
|
||||
from trustgraph.trustgraph import TrustGraph
|
||||
from trustgraph.edge_map import VectorStore
|
||||
from trustgraph.trustgraph import TrustGraph
|
||||
from trustgraph.llm_client import LlmClient
|
||||
from trustgraph.embeddings_client import EmbeddingsClient
|
||||
|
||||
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
|
||||
DEFINITION="http://www.w3.org/2004/02/skos/core#definition"
|
||||
|
||||
class GraphRag:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
graph_hosts=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
vector_store="http://milvus:19530",
|
||||
verbose=False
|
||||
):
|
||||
|
||||
self.verbose=verbose
|
||||
|
||||
if graph_hosts == None:
|
||||
graph_hosts = ["cassandra"]
|
||||
|
||||
if self.verbose:
|
||||
print("Initialising...", flush=True)
|
||||
|
||||
self.graph = TrustGraph(graph_hosts)
|
||||
|
||||
self.embeddings = EmbeddingsClient(pulsar_host=pulsar_host)
|
||||
|
||||
self.vecstore = VectorStore(vector_store)
|
||||
|
||||
self.entity_limit=50
|
||||
self.query_limit=30
|
||||
self.max_sg_size=3000
|
||||
|
||||
self.label_cache = {}
|
||||
|
||||
self.llm = LlmClient(pulsar_host=pulsar_host)
|
||||
|
||||
if self.verbose:
|
||||
print("Initialised", flush=True)
|
||||
|
||||
def get_vector(self, query):
|
||||
|
||||
if self.verbose:
|
||||
print("Compute embeddings...", flush=True)
|
||||
|
||||
qembeds = self.embeddings.request(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Done.", flush=True)
|
||||
|
||||
return qembeds
|
||||
|
||||
def get_entities(self, query):
|
||||
|
||||
everything = []
|
||||
|
||||
vectors = self.get_vector(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Get entities...", flush=True)
|
||||
|
||||
for vector in vectors:
|
||||
|
||||
res = self.vecstore.search(
|
||||
vector,
|
||||
limit=self.entity_limit
|
||||
)
|
||||
|
||||
entities = set([
|
||||
item["entity"]["entity"]
|
||||
for item in res
|
||||
])
|
||||
|
||||
everything.extend(entities)
|
||||
|
||||
if self.verbose:
|
||||
print("Entities:", flush=True)
|
||||
for ent in everything:
|
||||
print(" ", ent, flush=True)
|
||||
|
||||
return everything
|
||||
|
||||
def maybe_label(self, e):
|
||||
|
||||
if e in self.label_cache:
|
||||
return self.label_cache[e]
|
||||
|
||||
res = self.graph.get_sp(e, LABEL)
|
||||
res = list(res)
|
||||
|
||||
if len(res) == 0:
|
||||
self.label_cache[e] = e
|
||||
return e
|
||||
|
||||
self.label_cache[e] = res[0][0]
|
||||
return self.label_cache[e]
|
||||
|
||||
def get_nodes(self, query):
|
||||
|
||||
ents = self.get_entities(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Get labels...", flush=True)
|
||||
|
||||
nodes = [
|
||||
self.maybe_label(e)
|
||||
for e in ents
|
||||
]
|
||||
|
||||
if self.verbose:
|
||||
print("Nodes:", flush=True)
|
||||
for node in nodes:
|
||||
print(" ", node, flush=True)
|
||||
|
||||
return nodes
|
||||
|
||||
def get_subgraph(self, query):
|
||||
|
||||
entities = self.get_entities(query)
|
||||
|
||||
subgraph = set()
|
||||
|
||||
if self.verbose:
|
||||
print("Get subgraph...", flush=True)
|
||||
|
||||
for e in entities:
|
||||
|
||||
res = self.graph.get_s(e, limit=self.query_limit)
|
||||
for p, o in res:
|
||||
subgraph.add((e, p, o))
|
||||
|
||||
res = self.graph.get_p(e, limit=self.query_limit)
|
||||
for s, o in res:
|
||||
subgraph.add((s, e, o))
|
||||
|
||||
res = self.graph.get_o(e, limit=self.query_limit)
|
||||
for s, p in res:
|
||||
subgraph.add((s, p, e))
|
||||
|
||||
subgraph = list(subgraph)
|
||||
|
||||
subgraph = subgraph[0:self.max_sg_size]
|
||||
|
||||
if self.verbose:
|
||||
print("Subgraph:", flush=True)
|
||||
for edge in subgraph:
|
||||
print(" ", str(edge), flush=True)
|
||||
|
||||
if self.verbose:
|
||||
print("Done.", flush=True)
|
||||
|
||||
return subgraph
|
||||
|
||||
def get_labelgraph(self, query):
|
||||
|
||||
subgraph = self.get_subgraph(query)
|
||||
|
||||
sg2 = []
|
||||
|
||||
for edge in subgraph:
|
||||
|
||||
if edge[1] == LABEL:
|
||||
continue
|
||||
|
||||
s = self.maybe_label(edge[0])
|
||||
p = self.maybe_label(edge[1])
|
||||
o = self.maybe_label(edge[2])
|
||||
|
||||
sg2.append((s, p, o))
|
||||
|
||||
return sg2
|
||||
|
||||
def get_cypher(self, query):
|
||||
|
||||
sg = self.get_labelgraph(query)
|
||||
|
||||
sg2 = []
|
||||
|
||||
for s, p, o in sg:
|
||||
|
||||
sg2.append(f"({s})-[{p}]->({o})")
|
||||
|
||||
kg = "\n".join(sg2)
|
||||
kg = kg.replace("\\", "-")
|
||||
|
||||
return kg
|
||||
|
||||
def get_graph_prompt(self, query):
|
||||
|
||||
kg = self.get_cypher(query)
|
||||
|
||||
prompt=f"""<instructions>Study the knowledge graph provided, and use
|
||||
the information to answer the question. The question should be answered
|
||||
in plain English only.
|
||||
</instructions>
|
||||
<knowledge-graph>
|
||||
{kg}
|
||||
</knowledge-graph>
|
||||
<question>
|
||||
{query}
|
||||
</question>
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def query(self, query):
|
||||
|
||||
if self.verbose:
|
||||
print("Construct prompt...", flush=True)
|
||||
|
||||
prompt = self.get_graph_prompt(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Invoke LLM...", flush=True)
|
||||
|
||||
resp = self.llm.request(prompt)
|
||||
|
||||
if self.verbose:
|
||||
print("Done", flush=True)
|
||||
|
||||
return resp
|
||||
|
||||
68
trustgraph/graph_rag_client.py
Normal file
68
trustgraph/graph_rag_client.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pulsar
|
||||
import _pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema import GraphRagQuery, GraphRagResponse
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class GraphRagClient:
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR, client_id=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if client_id == None:
|
||||
client_id = str(uuid.uuid4())
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic='graph-rag-query',
|
||||
schema=JsonSchema(GraphRagQuery),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
'graph-rag-response', client_id,
|
||||
schema=JsonSchema(GraphRagResponse),
|
||||
)
|
||||
|
||||
def request(self, query, timeout=500):
|
||||
|
||||
id = str(uuid.uuid4())
|
||||
|
||||
r = GraphRagQuery(
|
||||
query=query
|
||||
)
|
||||
self.producer.send(r, properties={ "id": id })
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive(timeout_millis=timeout * 1000)
|
||||
|
||||
mid = msg.properties()["id"]
|
||||
|
||||
if mid == id:
|
||||
resp = msg.value().response
|
||||
self.consumer.acknowledge(msg)
|
||||
return resp
|
||||
|
||||
# Ignore messages with wrong ID
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
|
||||
self.client.close()
|
||||
|
||||
0
trustgraph/kg/__init__.py
Normal file
0
trustgraph/kg/__init__.py
Normal file
3
trustgraph/kg/extract_definitions/__init__.py
Normal file
3
trustgraph/kg/extract_definitions/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . extract import *
|
||||
|
||||
7
trustgraph/kg/extract_definitions/__main__.py
Executable file
7
trustgraph/kg/extract_definitions/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . extract import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
193
trustgraph/kg/extract_definitions/extract.py
Executable file
193
trustgraph/kg/extract_definitions/extract.py
Executable file
|
|
@ -0,0 +1,193 @@
|
|||
|
||||
"""
|
||||
Simple decoder, accepts PDF documents on input, outputs pages from the
|
||||
PDF document as text as separate output objects.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import rdflib
|
||||
import json
|
||||
import urllib.parse
|
||||
import time
|
||||
|
||||
from ... schema import VectorsChunk, Triple, Source, Value
|
||||
from ... log_level import LogLevel
|
||||
from ... llm_client import LlmClient
|
||||
from ... prompts import to_definitions
|
||||
from ... rdf import TRUSTGRAPH_ENTITIES, DEFINITION
|
||||
|
||||
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(VectorsChunk),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(Triple),
|
||||
)
|
||||
|
||||
self.llm = LlmClient(pulsar_host=pulsar_host)
|
||||
|
||||
def to_uri(self, text):
|
||||
|
||||
part = text.replace(" ", "-").lower().encode("utf-8")
|
||||
quoted = urllib.parse.quote(part)
|
||||
uri = TRUSTGRAPH_ENTITIES + quoted
|
||||
|
||||
return uri
|
||||
|
||||
def get_definitions(self, chunk):
|
||||
|
||||
prompt = to_definitions(chunk)
|
||||
resp = self.llm.request(prompt)
|
||||
|
||||
defs = json.loads(resp)
|
||||
|
||||
return defs
|
||||
|
||||
def emit_edge(self, s, p, o):
|
||||
|
||||
t = Triple(s=s, p=p, o=o)
|
||||
self.producer.send(t)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.source.id}...", flush=True)
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
g = rdflib.Graph()
|
||||
|
||||
try:
|
||||
|
||||
defs = self.get_definitions(chunk)
|
||||
print(json.dumps(defs, indent=4), flush=True)
|
||||
|
||||
for defn in defs:
|
||||
|
||||
s = defn["entity"]
|
||||
s_uri = self.to_uri(s)
|
||||
|
||||
o = defn["definition"]
|
||||
|
||||
s_value = Value(value=str(s_uri), is_uri=True)
|
||||
o_value = Value(value=str(o), is_uri=False)
|
||||
|
||||
self.emit_edge(s_value, DEFINITION_VALUE, o_value)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception: ", e, flush=True)
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception: ", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='pdf-decoder',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'vectors-chunk-load'
|
||||
default_output_queue = 'graph-load'
|
||||
default_subscriber = 'kg-extract-definitions'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
3
trustgraph/kg/extract_relationships/__init__.py
Normal file
3
trustgraph/kg/extract_relationships/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . extract import *
|
||||
|
||||
7
trustgraph/kg/extract_relationships/__main__.py
Executable file
7
trustgraph/kg/extract_relationships/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . extract import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
252
trustgraph/kg/extract_relationships/extract.py
Executable file
252
trustgraph/kg/extract_relationships/extract.py
Executable file
|
|
@ -0,0 +1,252 @@
|
|||
|
||||
"""
|
||||
Simple decoder, accepts PDF documents on input, outputs pages from the
|
||||
PDF document as text as separate output objects.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import rdflib
|
||||
import json
|
||||
import urllib.parse
|
||||
import time
|
||||
|
||||
from ... schema import VectorsChunk, Triple, VectorsAssociation, Source, Value
|
||||
from ... log_level import LogLevel
|
||||
from ... llm_client import LlmClient
|
||||
from ... prompts import to_relationships
|
||||
from ... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES
|
||||
|
||||
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
vec_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(VectorsChunk),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(Triple),
|
||||
)
|
||||
|
||||
self.vec_prod = self.client.create_producer(
|
||||
topic=vec_queue,
|
||||
schema=JsonSchema(VectorsAssociation),
|
||||
)
|
||||
|
||||
self.llm = LlmClient(pulsar_host=pulsar_host)
|
||||
|
||||
def to_uri(self, text):
|
||||
|
||||
part = text.replace(" ", "-").lower().encode("utf-8")
|
||||
quoted = urllib.parse.quote(part)
|
||||
uri = TRUSTGRAPH_ENTITIES + quoted
|
||||
|
||||
return uri
|
||||
|
||||
def get_relationships(self, chunk):
|
||||
|
||||
prompt = to_relationships(chunk)
|
||||
resp = self.llm.request(prompt)
|
||||
|
||||
rels = json.loads(resp)
|
||||
|
||||
return rels
|
||||
|
||||
def emit_edge(self, s, p, o):
|
||||
|
||||
t = Triple(s=s, p=p, o=o)
|
||||
self.producer.send(t)
|
||||
|
||||
def emit_vec(self, ent, vec):
|
||||
|
||||
r = VectorsAssociation(entity=ent, vectors=vec)
|
||||
self.vec_prod.send(r)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.source.id}...", flush=True)
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
g = rdflib.Graph()
|
||||
|
||||
try:
|
||||
|
||||
rels = self.get_relationships(chunk)
|
||||
print(json.dumps(rels, indent=4), flush=True)
|
||||
|
||||
for rel in rels:
|
||||
|
||||
s = rel["subject"]
|
||||
p = rel["predicate"]
|
||||
o = rel["object"]
|
||||
|
||||
s_uri = self.to_uri(s)
|
||||
s_value = Value(value=str(s_uri), is_uri=True)
|
||||
|
||||
p_uri = self.to_uri(p)
|
||||
p_value = Value(value=str(p_uri), is_uri=True)
|
||||
|
||||
if rel["object-entity"]:
|
||||
o_uri = self.to_uri(o)
|
||||
o_value = Value(value=str(o_uri), is_uri=True)
|
||||
else:
|
||||
o_value = Value(value=str(o), is_uri=False)
|
||||
|
||||
self.emit_edge(
|
||||
s_value,
|
||||
p_value,
|
||||
o_value
|
||||
)
|
||||
|
||||
# Label for s
|
||||
self.emit_edge(
|
||||
s_value,
|
||||
RDF_LABEL_VALUE,
|
||||
Value(value=str(s), is_uri=False)
|
||||
)
|
||||
|
||||
# Label for p
|
||||
self.emit_edge(
|
||||
p_value,
|
||||
RDF_LABEL_VALUE,
|
||||
Value(value=str(p), is_uri=False)
|
||||
)
|
||||
|
||||
if rel["object-entity"]:
|
||||
# Label for o
|
||||
self.emit_edge(
|
||||
o_value,
|
||||
RDF_LABEL_VALUE,
|
||||
Value(value=str(o), is_uri=False)
|
||||
)
|
||||
|
||||
self.emit_vec(s_value, v.vectors)
|
||||
self.emit_vec(p_value, v.vectors)
|
||||
if rel["object-entity"]:
|
||||
self.emit_vec(o_value, v.vectors)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception: ", e, flush=True)
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception: ", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='kg-extract-relationships',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'vectors-chunk-load'
|
||||
default_output_queue = 'graph-load'
|
||||
default_subscriber = 'kg-extract-relationships'
|
||||
default_vector_queue='vectors-load'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-c', '--vector-queue',
|
||||
default=default_vector_queue,
|
||||
help=f'Vector output queue (default: {default_vector_queue})'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
vec_queue=args.vector_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
0
trustgraph/llm/__init__.py
Normal file
0
trustgraph/llm/__init__.py
Normal file
3
trustgraph/llm/azure_text/__init__.py
Normal file
3
trustgraph/llm/azure_text/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . llm import *
|
||||
|
||||
7
trustgraph/llm/azure_text/__main__.py
Executable file
7
trustgraph/llm/azure_text/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . llm import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
213
trustgraph/llm/azure_text/llm.py
Executable file
213
trustgraph/llm/azure_text/llm.py
Executable file
|
|
@ -0,0 +1,213 @@
|
|||
|
||||
"""
|
||||
Simple LLM service, performs text prompt completion using an Ollama service.
|
||||
Input is prompt, output is response.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
from langchain_community.llms import Ollama
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
|
||||
from ... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
endpoint,
|
||||
token,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(TextCompletionRequest),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(TextCompletionResponse),
|
||||
)
|
||||
|
||||
self.endpoint = endpoint
|
||||
self.token = token
|
||||
|
||||
def build_prompt(self, system, content):
|
||||
|
||||
data = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "system", "content": system
|
||||
},
|
||||
{
|
||||
"role": "user", "content": content
|
||||
}
|
||||
],
|
||||
"max_tokens": 4192,
|
||||
"temperature": 0.2,
|
||||
"top_p": 1
|
||||
}
|
||||
|
||||
body = json.dumps(data)
|
||||
|
||||
return body
|
||||
|
||||
def call_llm(self, body):
|
||||
|
||||
url = self.endpoint
|
||||
|
||||
# Replace this with the primary/secondary key, AMLToken, or
|
||||
# Microsoft Entra ID token for the endpoint
|
||||
api_key = self.token
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {api_key}'
|
||||
}
|
||||
|
||||
resp = requests.post(url, data=body, headers=headers)
|
||||
result = resp.json()
|
||||
|
||||
message_content = result['choices'][0]['message']['content']
|
||||
|
||||
return message_content
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling prompt {id}...", flush=True)
|
||||
|
||||
prompt = self.build_prompt(
|
||||
"You are a helpful chatbot",
|
||||
v.prompt
|
||||
)
|
||||
|
||||
response = self.call_llm(prompt)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = TextCompletionResponse(response=response)
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='llm-ollama-text',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'llm-complete-text'
|
||||
default_output_queue = 'llm-complete-text-response'
|
||||
default_subscriber = 'llm-ollama-text'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-e', '--endpoint',
|
||||
help=f'LLM model endpoint'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-k', '--token',
|
||||
help=f'LLM model token'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
endpoint=args.endpoint,
|
||||
token=args.token,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
3
trustgraph/llm/claude_text/__init__.py
Normal file
3
trustgraph/llm/claude_text/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . llm import *
|
||||
|
||||
7
trustgraph/llm/claude_text/__main__.py
Executable file
7
trustgraph/llm/claude_text/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . llm import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
190
trustgraph/llm/claude_text/llm.py
Executable file
190
trustgraph/llm/claude_text/llm.py
Executable file
|
|
@ -0,0 +1,190 @@
|
|||
|
||||
"""
|
||||
Simple LLM service, performs text prompt completion using Claude.
|
||||
Input is prompt, output is response.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import anthropic
|
||||
import time
|
||||
|
||||
from ... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
model,
|
||||
api_key,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(TextCompletionRequest),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(TextCompletionResponse),
|
||||
)
|
||||
|
||||
self.model = model
|
||||
|
||||
self.claude = anthropic.Anthropic(api_key=api_key)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling prompt {id}...", flush=True)
|
||||
|
||||
prompt = v.prompt
|
||||
response = message = self.claude.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=1000,
|
||||
temperature=0.1,
|
||||
system = "You are a helpful chatbot.",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": prompt
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
resp = response.content[0].text
|
||||
print(resp, flush=True)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = TextCompletionResponse(response=resp)
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='llm-ollama-text',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'llm-complete-text'
|
||||
default_output_queue = 'llm-complete-text-response'
|
||||
default_subscriber = 'llm-claude-text'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-m', '--model',
|
||||
default="claude-3-5-sonnet-20240620",
|
||||
help=f'LLM model (default: claude-3-5-sonnet-20240620)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-k', '--api-key',
|
||||
help=f'Claude API key'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
model=args.model,
|
||||
api_key=args.api_key,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
3
trustgraph/llm/ollama_text/__init__.py
Normal file
3
trustgraph/llm/ollama_text/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . llm import *
|
||||
|
||||
7
trustgraph/llm/ollama_text/__main__.py
Executable file
7
trustgraph/llm/ollama_text/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . llm import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
169
trustgraph/llm/ollama_text/llm.py
Executable file
169
trustgraph/llm/ollama_text/llm.py
Executable file
|
|
@ -0,0 +1,169 @@
|
|||
|
||||
"""
|
||||
Simple LLM service, performs text prompt completion using an Ollama service.
|
||||
Input is prompt, output is response.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
from langchain_community.llms import Ollama
|
||||
import time
|
||||
|
||||
from ... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
model,
|
||||
ollama,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(TextCompletionRequest),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(TextCompletionResponse),
|
||||
)
|
||||
|
||||
self.llm = Ollama(base_url=ollama, model=model)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling prompt {id}...", flush=True)
|
||||
|
||||
prompt = v.prompt
|
||||
response = self.llm.invoke(prompt)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = TextCompletionResponse(response=response)
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
print("Closing")
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='llm-ollama-text',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'llm-complete-text'
|
||||
default_output_queue = 'llm-complete-text-response'
|
||||
default_subscriber = 'llm-ollama-text'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-m', '--model',
|
||||
default="gemma2",
|
||||
help=f'LLM model (default: gemma2)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-r', '--ollama',
|
||||
default="http://localhost:11434",
|
||||
help=f'ollama (default: http://localhost:11434)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
model=args.model,
|
||||
ollama=args.ollama,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
3
trustgraph/llm/vertexai_text/__init__.py
Normal file
3
trustgraph/llm/vertexai_text/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . llm import *
|
||||
|
||||
7
trustgraph/llm/vertexai_text/__main__.py
Executable file
7
trustgraph/llm/vertexai_text/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . llm import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
254
trustgraph/llm/vertexai_text/llm.py
Executable file
254
trustgraph/llm/vertexai_text/llm.py
Executable file
|
|
@ -0,0 +1,254 @@
|
|||
|
||||
"""
|
||||
Simple LLM service, performs text prompt completion using an Ollama service.
|
||||
Input is prompt, output is response.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import vertexai
|
||||
import time
|
||||
|
||||
from google.oauth2 import service_account
|
||||
import google
|
||||
|
||||
from vertexai.preview.generative_models import (
|
||||
Content,
|
||||
FunctionDeclaration,
|
||||
GenerativeModel,
|
||||
GenerationConfig,
|
||||
HarmCategory,
|
||||
HarmBlockThreshold,
|
||||
Part,
|
||||
Tool,
|
||||
)
|
||||
|
||||
from ... schema import TextCompletionRequest, TextCompletionResponse
|
||||
from ... log_level import LogLevel
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
credentials,
|
||||
region,
|
||||
model,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(TextCompletionRequest),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(TextCompletionResponse),
|
||||
)
|
||||
|
||||
self.parameters = {
|
||||
"temperature": 0.2,
|
||||
"top_p": 1.0,
|
||||
"top_k": 32,
|
||||
"candidate_count": 1,
|
||||
"max_output_tokens": 8192,
|
||||
}
|
||||
|
||||
self.generation_config = GenerationConfig(
|
||||
temperature=0.2,
|
||||
top_p=1.0,
|
||||
top_k=10,
|
||||
candidate_count=1,
|
||||
max_output_tokens=8191,
|
||||
)
|
||||
|
||||
# Block none doesn't seem to work
|
||||
block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH
|
||||
# block_level = HarmBlockThreshold.BLOCK_NONE
|
||||
|
||||
self.safety_settings = {
|
||||
HarmCategory.HARM_CATEGORY_HARASSMENT: block_level,
|
||||
HarmCategory.HARM_CATEGORY_HATE_SPEECH: block_level,
|
||||
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: block_level,
|
||||
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: block_level,
|
||||
}
|
||||
|
||||
print("Initialise VertexAI...", flush=True)
|
||||
|
||||
if credentials:
|
||||
vertexai.init(
|
||||
location=region,
|
||||
credentials=credentials,
|
||||
project=credentials.project_id,
|
||||
)
|
||||
else:
|
||||
vertexai.init(
|
||||
location=region
|
||||
)
|
||||
|
||||
print(f"Initialise model {model}", flush=True)
|
||||
self.llm = GenerativeModel(model)
|
||||
|
||||
print("Initialisation complete", flush=True)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling prompt {id}...", flush=True)
|
||||
|
||||
prompt = v.prompt
|
||||
|
||||
resp = self.llm.generate_content(
|
||||
prompt, generation_config=self.generation_config,
|
||||
safety_settings=self.safety_settings
|
||||
)
|
||||
|
||||
resp = resp.text
|
||||
|
||||
resp = resp.replace("```json", "")
|
||||
resp = resp.replace("```", "")
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = TextCompletionResponse(response=resp)
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except google.api_core.exceptions.ResourceExhausted:
|
||||
|
||||
print("429, resource busy, sleeping", flush=True)
|
||||
time.sleep(15)
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='llm-ollama-text',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'llm-complete-text'
|
||||
default_output_queue = 'llm-complete-text-response'
|
||||
default_subscriber = 'llm-vertexai-text'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-m', '--model',
|
||||
default="gemini-1.0-pro-001",
|
||||
help=f'LLM model (default: gemini-1.0-pro-001)'
|
||||
)
|
||||
# Also: text-bison-32k
|
||||
|
||||
parser.add_argument(
|
||||
'-k', '--private-key',
|
||||
help=f'Google Cloud private JSON file'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-r', '--region',
|
||||
default='us-west1',
|
||||
help=f'Google Cloud region (default: us-west1)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.private_key:
|
||||
credentials = service_account.Credentials.from_service_account_file(
|
||||
args.private_key
|
||||
)
|
||||
else:
|
||||
credentials = None
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
credentials=credentials,
|
||||
region=args.region,
|
||||
model=args.model,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
71
trustgraph/llm_client.py
Normal file
71
trustgraph/llm_client.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pulsar
|
||||
import _pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from trustgraph.schema import TextCompletionRequest, TextCompletionResponse
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
# Ugly
|
||||
ERROR=_pulsar.LoggerLevel.Error
|
||||
WARN=_pulsar.LoggerLevel.Warn
|
||||
INFO=_pulsar.LoggerLevel.Info
|
||||
DEBUG=_pulsar.LoggerLevel.Debug
|
||||
|
||||
class LlmClient:
|
||||
|
||||
def __init__(
|
||||
self, log_level=ERROR, client_id=None,
|
||||
pulsar_host="pulsar://pulsar:6650",
|
||||
):
|
||||
|
||||
if client_id == None:
|
||||
client_id = str(uuid.uuid4())
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic='llm-complete-text',
|
||||
schema=JsonSchema(TextCompletionRequest),
|
||||
chunking_enabled=True,
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
'llm-complete-text-response', client_id,
|
||||
schema=JsonSchema(TextCompletionResponse),
|
||||
)
|
||||
|
||||
def request(self, prompt, timeout=500):
|
||||
|
||||
id = str(uuid.uuid4())
|
||||
|
||||
|
||||
r = TextCompletionRequest(
|
||||
prompt=prompt
|
||||
)
|
||||
self.producer.send(r, properties={ "id": id })
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive(timeout_millis=timeout * 1000)
|
||||
|
||||
mid = msg.properties()["id"]
|
||||
|
||||
if mid == id:
|
||||
resp = msg.value().response
|
||||
self.consumer.acknowledge(msg)
|
||||
return resp
|
||||
|
||||
# Ignore messages with wrong ID
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
|
||||
self.producer.close()
|
||||
self.consumer.close()
|
||||
self.client.close()
|
||||
|
||||
20
trustgraph/log_level.py
Normal file
20
trustgraph/log_level.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
from enum import Enum
|
||||
import _pulsar
|
||||
|
||||
class LogLevel(Enum):
|
||||
DEBUG = 'debug'
|
||||
INFO = 'info'
|
||||
WARN = 'warn'
|
||||
ERROR = 'error'
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
def to_pulsar(self):
|
||||
if self == LogLevel.DEBUG: return _pulsar.LoggerLevel.Debug
|
||||
if self == LogLevel.INFO: return _pulsar.LoggerLevel.Info
|
||||
if self == LogLevel.WARN: return _pulsar.LoggerLevel.Warn
|
||||
if self == LogLevel.ERROR: return _pulsar.LoggerLevel.Error
|
||||
raise RuntimeError("Log level mismatch")
|
||||
|
||||
138
trustgraph/prompts.py
Normal file
138
trustgraph/prompts.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
|
||||
def turtle_extract(text):
|
||||
|
||||
prompt = f"""<instructions>
|
||||
Study the following text and extract knowledge as
|
||||
information in Turtle RDF format.
|
||||
When declaring any new URIs, use <https://trustgraph.ai/e#> prefix,
|
||||
and declare appropriate namespace tags.
|
||||
</instructions>
|
||||
|
||||
<text>
|
||||
{text}
|
||||
</text>
|
||||
|
||||
<requirements>
|
||||
Do not use placeholders for information you do not know.
|
||||
You will respond only with raw Turtle RDF data. Do not provide
|
||||
explanations. Do not use special characters in the abstract text. The
|
||||
abstract must be written as plain text. Do not add markdown formatting.
|
||||
</requirements>"""
|
||||
|
||||
return prompt
|
||||
|
||||
def scholar(text):
|
||||
|
||||
# Build the prompt for Article style extraction
|
||||
jsonexample = """{
|
||||
"title": "Article title here",
|
||||
"abstract": "Abstract text here",
|
||||
"keywords": ["keyword1", "keyword2", "keyword3"],
|
||||
"people": ["person1", "person2", "person3"]
|
||||
}"""
|
||||
|
||||
promptscholar = f"""Your task is to read the provided text and write a scholarly abstract to fully explain all of the concepts described in the provided text. The abstract must include all conceptual details.
|
||||
<text>
|
||||
{text}
|
||||
</text>
|
||||
<instructions>
|
||||
|
||||
- Structure: For the provided text, write a title, abstract, keywords,
|
||||
and people for the concepts found in the provided text. Ignore
|
||||
document formatting in the provided text such as table of contents,
|
||||
headers, footers, section metadata, and URLs.
|
||||
- Focus on Concepts The abstract must focus on concepts found in the
|
||||
provided text. The abstract must be factually accurate. Do not
|
||||
write any concepts not found in the provided text. Do not
|
||||
speculate. Do not omit any conceptual details.
|
||||
- Completeness: The abstract must capture all topics the reader will
|
||||
need to understand the concepts found in the provided text. Describe
|
||||
all terms, definitions, entities, people, events, concepts,
|
||||
conceptual relationships, and any other topics necessary for the
|
||||
reader to understand the concepts of the provided text.
|
||||
|
||||
- Format: Respond in the form of a valid JSON object.
|
||||
</instructions>
|
||||
<example>
|
||||
{jsonexample}
|
||||
</example>
|
||||
<requirements>
|
||||
You will respond only with the JSON object. Do not provide
|
||||
explanations. Do not use special characters in the abstract text. The
|
||||
abstract must be written as plain text.
|
||||
</requirements>"""
|
||||
|
||||
return promptscholar
|
||||
|
||||
def to_json_ld(text):
|
||||
|
||||
prompt = f"""<instructions>
|
||||
Study the following text and output any facts you discover in
|
||||
well-structured JSON-LD format.
|
||||
Use any schema you understand from schema.org to describe the facts.
|
||||
</instructions>
|
||||
|
||||
<text>
|
||||
{text}
|
||||
</text>
|
||||
|
||||
<requirements>
|
||||
You will respond only with raw JSON-LD data in JSON format. Do not provide
|
||||
explanations. Do not use special characters in the abstract text. The
|
||||
abstract must be written as plain text. Do not add markdown formatting
|
||||
or headers or prefixes. Do not use information which is not present in
|
||||
the input text.
|
||||
</requirements>"""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
def to_relationships(text):
|
||||
|
||||
prompt = f"""<instructions>
|
||||
Study the following text and derive entity relationships. For each
|
||||
relationship, derive the subject, predicate and object of the relationship.
|
||||
Output relationships in JSON format as an arary of objects with fields:
|
||||
- subject: the subject of the relationship
|
||||
- predicate: the predicate
|
||||
- object: the object of the relationship
|
||||
- object-entity: false if the object is a simple data type: name, value or date. true if it is an entity.
|
||||
</instructions>
|
||||
|
||||
<text>
|
||||
{text}
|
||||
</text>
|
||||
|
||||
<requirements>
|
||||
You will respond only with raw JSON format data. Do not provide
|
||||
explanations. Do not use special characters in the abstract text. The
|
||||
abstract must be written as plain text. Do not add markdown formatting
|
||||
or headers or prefixes.
|
||||
</requirements>"""
|
||||
|
||||
return prompt
|
||||
|
||||
def to_definitions(text):
|
||||
|
||||
prompt = f"""<instructions>
|
||||
Study the following text and derive definitions for any discovered entities.
|
||||
Do not provide definitions for entities whose definitions are incomplete
|
||||
or unknown.
|
||||
Output relationships in JSON format as an arary of objects with fields:
|
||||
- entity: the name of the entity
|
||||
- definition: English text which defines the entity
|
||||
</instructions>
|
||||
|
||||
<text>
|
||||
{text}
|
||||
</text>
|
||||
|
||||
<requirements>
|
||||
You will respond only with raw JSON format data. Do not provide
|
||||
explanations. Do not use special characters in the abstract text. The
|
||||
abstract will be written as plain text. Do not add markdown formatting
|
||||
or headers or prefixes. Do not include null or unknown definitions.
|
||||
</requirements>"""
|
||||
|
||||
return prompt
|
||||
|
||||
0
trustgraph/rag/__init__.py
Normal file
0
trustgraph/rag/__init__.py
Normal file
3
trustgraph/rag/graph/__init__.py
Normal file
3
trustgraph/rag/graph/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . rag import *
|
||||
|
||||
7
trustgraph/rag/graph/__main__.py
Executable file
7
trustgraph/rag/graph/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . rag import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
172
trustgraph/rag/graph/rag.py
Executable file
172
trustgraph/rag/graph/rag.py
Executable file
|
|
@ -0,0 +1,172 @@
|
|||
|
||||
"""
|
||||
Simple RAG service, performs query using graph RAG an LLM.
|
||||
Input is query, output is response.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from ... schema import GraphRagQuery, GraphRagResponse
|
||||
from ... log_level import LogLevel
|
||||
from ... graph_rag import GraphRag
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
output_queue,
|
||||
subscriber,
|
||||
log_level,
|
||||
graph_hosts,
|
||||
vector_store,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(GraphRagQuery),
|
||||
)
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(GraphRagResponse),
|
||||
)
|
||||
|
||||
self.rag = GraphRag(
|
||||
pulsar_host=pulsar_host,
|
||||
graph_hosts=graph_hosts,
|
||||
vector_store=vector_store,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
# Sender-produced ID
|
||||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
|
||||
response = self.rag.query(v.query)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = GraphRagResponse(response = response)
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
print("Closing", flush=True)
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='llm-ollama-text',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'graph-rag-query'
|
||||
default_output_queue = 'graph-rag-response'
|
||||
default_subscriber = 'graph-rag'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-g', '--graph-hosts',
|
||||
default='cassandra',
|
||||
help=f'Graph hosts, comma separated (default: cassandra)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-v', '--vector-store',
|
||||
default='http://milvus:19530',
|
||||
help=f'Vector host (default: http://milvus:19530)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
output_queue=args.output_queue,
|
||||
subscriber=args.subscriber,
|
||||
log_level=args.log_level,
|
||||
graph_hosts=args.graph_hosts.split(","),
|
||||
vector_store=args.vector_store,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
6
trustgraph/rdf.py
Normal file
6
trustgraph/rdf.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
RDF_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
|
||||
DEFINITION = "http://www.w3.org/2004/02/skos/core#definition"
|
||||
|
||||
TRUSTGRAPH_ENTITIES = "http://trustgraph.ai/e/"
|
||||
|
||||
67
trustgraph/schema.py
Normal file
67
trustgraph/schema.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
|
||||
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
|
||||
|
||||
from enum import Enum
|
||||
|
||||
#class Command(Enum):
|
||||
# reindex = 1
|
||||
|
||||
#class IndexCommand(Record):
|
||||
# command = Command
|
||||
|
||||
class Value(Record):
|
||||
value = String()
|
||||
is_uri = Boolean()
|
||||
type = String()
|
||||
|
||||
class Source(Record):
|
||||
source = String()
|
||||
id = String()
|
||||
title = String()
|
||||
|
||||
class Document(Record):
|
||||
source = Source()
|
||||
data = Bytes()
|
||||
|
||||
class TextDocument(Record):
|
||||
source = Source()
|
||||
text = Bytes()
|
||||
|
||||
class Chunk(Record):
|
||||
source = Source()
|
||||
chunk = Bytes()
|
||||
|
||||
class VectorsChunk(Record):
|
||||
source = Source()
|
||||
vectors = Array(Array(Double()))
|
||||
chunk = Bytes()
|
||||
|
||||
class VectorsAssociation(Record):
|
||||
source = Source()
|
||||
vectors = Array(Array(Double()))
|
||||
entity = Value()
|
||||
|
||||
class Triple(Record):
|
||||
source = Source()
|
||||
s = Value()
|
||||
p = Value()
|
||||
o = Value()
|
||||
|
||||
class TextCompletionRequest(Record):
|
||||
prompt = String()
|
||||
|
||||
class TextCompletionResponse(Record):
|
||||
response = String()
|
||||
|
||||
class EmbeddingsRequest(Record):
|
||||
text = String()
|
||||
|
||||
class EmbeddingsResponse(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
class GraphRagQuery(Record):
|
||||
query = String()
|
||||
|
||||
class GraphRagResponse(Record):
|
||||
response = String()
|
||||
|
||||
108
trustgraph/trustgraph.py
Normal file
108
trustgraph/trustgraph.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
|
||||
from cassandra.cluster import Cluster
|
||||
from cassandra.auth import PlainTextAuthProvider
|
||||
|
||||
class TrustGraph:
|
||||
|
||||
def __init__(self, hosts=None):
|
||||
|
||||
if hosts is None:
|
||||
hosts = ["localhost"]
|
||||
|
||||
self.cluster = Cluster(hosts)
|
||||
self.session = self.cluster.connect()
|
||||
|
||||
self.init()
|
||||
|
||||
def clear(self):
|
||||
|
||||
self.session.execute("""
|
||||
drop keyspace if exists trustgraph;
|
||||
""");
|
||||
|
||||
self.init()
|
||||
|
||||
def init(self):
|
||||
|
||||
self.session.execute("""
|
||||
create keyspace if not exists trustgraph
|
||||
with replication = {
|
||||
'class' : 'SimpleStrategy',
|
||||
'replication_factor' : 1
|
||||
};
|
||||
""");
|
||||
|
||||
self.session.set_keyspace('trustgraph')
|
||||
|
||||
self.session.execute("""
|
||||
create table if not exists triples (
|
||||
s text,
|
||||
p text,
|
||||
o text,
|
||||
PRIMARY KEY (s, p)
|
||||
);
|
||||
""");
|
||||
|
||||
self.session.execute("""
|
||||
create index if not exists triples_p
|
||||
ON triples (p);
|
||||
""");
|
||||
|
||||
self.session.execute("""
|
||||
create index if not exists triples_o
|
||||
ON triples (o);
|
||||
""");
|
||||
|
||||
def insert(self, s, p, o):
|
||||
|
||||
self.session.execute(
|
||||
"insert into triples (s, p, o) values (%s, %s, %s)",
|
||||
(s, p, o)
|
||||
)
|
||||
|
||||
def get_all(self, limit=50):
|
||||
return self.session.execute(
|
||||
f"select s, p, o from triples limit {limit}"
|
||||
)
|
||||
|
||||
def get_s(self, s, limit=10):
|
||||
return self.session.execute(
|
||||
f"select p, o from triples where s = %s",
|
||||
(s,)
|
||||
)
|
||||
|
||||
def get_p(self, p, limit=10):
|
||||
return self.session.execute(
|
||||
f"select s, o from triples where p = %s limit {limit}",
|
||||
(p,)
|
||||
)
|
||||
|
||||
def get_o(self, o, limit=10):
|
||||
return self.session.execute(
|
||||
f"select s, p from triples where o = %s limit {limit}",
|
||||
(o,)
|
||||
)
|
||||
|
||||
def get_sp(self, s, p, limit=10):
|
||||
return self.session.execute(
|
||||
f"select o from triples where s = %s and p = %s limit {limit}",
|
||||
(s, p)
|
||||
)
|
||||
|
||||
def get_po(self, p, o, limit=10):
|
||||
return self.session.execute(
|
||||
f"select s from triples where p = %s and o = %s allow filtering limit {limit}",
|
||||
(p, o)
|
||||
)
|
||||
|
||||
def get_os(self, o, s, limit=10):
|
||||
return self.session.execute(
|
||||
f"select s from triples where o = %s and s = %s limit {limit}",
|
||||
(o, s)
|
||||
)
|
||||
|
||||
def get_spo(self, s, p, o, limit=10):
|
||||
return self.session.execute(
|
||||
f"""select s as x from triples where s = %s and p = %s and o = %s limit {limit}""",
|
||||
(s, p, o)
|
||||
)
|
||||
0
trustgraph/vector/__init__.py
Normal file
0
trustgraph/vector/__init__.py
Normal file
3
trustgraph/vector/milvus_write/__init__.py
Normal file
3
trustgraph/vector/milvus_write/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph/vector/milvus_write/__main__.py
Executable file
7
trustgraph/vector/milvus_write/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
136
trustgraph/vector/milvus_write/write.py
Executable file
136
trustgraph/vector/milvus_write/write.py
Executable file
|
|
@ -0,0 +1,136 @@
|
|||
|
||||
"""
|
||||
Simple decoder, accepts PDF documents on input, outputs pages from the
|
||||
PDF document as text as separate output objects.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
from pulsar.schema import JsonSchema
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
import tempfile
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from ... schema import VectorsAssociation
|
||||
from ... log_level import LogLevel
|
||||
from ... edge_map import VectorStore
|
||||
|
||||
class Processor:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pulsar_host,
|
||||
input_queue,
|
||||
subscriber,
|
||||
store_uri,
|
||||
log_level,
|
||||
):
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(VectorsAssociation),
|
||||
)
|
||||
|
||||
self.vecstore = VectorStore(store_uri)
|
||||
|
||||
def run(self):
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
|
||||
if v.entity.value != "":
|
||||
for vec in v.vectors:
|
||||
self.vecstore.insert(vec, v.entity.value)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
def __del__(self):
|
||||
self.client.close()
|
||||
|
||||
def run():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='pdf-decoder',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
default_input_queue = 'vectors-load'
|
||||
default_subscriber = 'vector-write-milvus'
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=default_pulsar_host,
|
||||
help=f'Pulsar host (default: {default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--store-uri',
|
||||
default="http://localhost:19530",
|
||||
help=f'Milvus store URI (default: http://localhost:19530)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = Processor(
|
||||
pulsar_host=args.pulsar_host,
|
||||
input_queue=args.input_queue,
|
||||
subscriber=args.subscriber,
|
||||
store_uri=args.store_uri,
|
||||
log_level=args.log_level,
|
||||
)
|
||||
|
||||
p.run()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue