diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index c182578a..a7f47697 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -71,35 +71,3 @@ jobs:
- name: Push containers
run: make push VERSION=${{ steps.version.outputs.VERSION }}
- release-bundle:
-
- name: Upload release bundle
- runs-on: ubuntu-24.04
- permissions:
- contents: write
- id-token: write
- environment:
- name: release
-
- steps:
-
- - name: Checkout
- uses: actions/checkout@v4
-
- - name: Install build dependencies
- run: pip3 install jsonnet
-
- - name: Get version
- id: version
- run: echo VERSION=$(git describe --exact-match --tags | sed 's/^v//') >> $GITHUB_OUTPUT
-
- - name: Create deploy bundle
- run: templates/generate-all deploy.zip ${{ steps.version.outputs.VERSION }}
-
- - uses: ncipollo/release-action@v1
- with:
- artifacts: deploy.zip
- generateReleaseNotes: true
- makeLatest: false
- prerelease: true
- skipIfReleaseExists: true
diff --git a/LICENSE b/LICENSE
index c6f01c63..6b0b1270 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,660 +1,203 @@
-# GNU AFFERO GENERAL PUBLIC LICENSE
-
-Version 3, 19 November 2007
-
-Copyright (C) 2007 Free Software Foundation, Inc.
-
-
-Everyone is permitted to copy and distribute verbatim copies of this
-license document, but changing it is not allowed.
-
-## Preamble
-
-The GNU Affero General Public License is a free, copyleft license for
-software and other kinds of works, specifically designed to ensure
-cooperation with the community in the case of network server software.
-
-The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-our General Public Licenses are intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains
-free software for all its users.
-
-When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-Developers that use our General Public Licenses protect your rights
-with two steps: (1) assert copyright on the software, and (2) offer
-you this License which gives you legal permission to copy, distribute
-and/or modify the software.
-
-A secondary benefit of defending all users' freedom is that
-improvements made in alternate versions of the program, if they
-receive widespread use, become available for other developers to
-incorporate. Many developers of free software are heartened and
-encouraged by the resulting cooperation. However, in the case of
-software used on network servers, this result may fail to come about.
-The GNU General Public License permits making a modified version and
-letting the public access it on a server without ever releasing its
-source code to the public.
-
-The GNU Affero General Public License is designed specifically to
-ensure that, in such cases, the modified source code becomes available
-to the community. It requires the operator of a network server to
-provide the source code of the modified version running there to the
-users of that server. Therefore, public use of a modified version, on
-a publicly accessible server, gives the public access to the source
-code of the modified version.
-
-An older license, called the Affero General Public License and
-published by Affero, was designed to accomplish similar goals. This is
-a different license, not a version of the Affero GPL, but Affero has
-released a new version of the Affero GPL which permits relicensing
-under this license.
-
-The precise terms and conditions for copying, distribution and
-modification follow.
-
-## TERMS AND CONDITIONS
-
-### 0. Definitions.
-
-"This License" refers to version 3 of the GNU Affero General Public
-License.
-
-"Copyright" also means copyright-like laws that apply to other kinds
-of works, such as semiconductor masks.
-
-"The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
-To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of
-an exact copy. The resulting work is called a "modified version" of
-the earlier work or a work "based on" the earlier work.
-
-A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user
-through a computer network, with no transfer of a copy, is not
-conveying.
-
-An interactive user interface displays "Appropriate Legal Notices" to
-the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-### 1. Source Code.
-
-The "source code" for a work means the preferred form of the work for
-making modifications to it. "Object code" means any non-source form of
-a work.
-
-A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-The Corresponding Source need not include anything that users can
-regenerate automatically from other parts of the Corresponding Source.
-
-The Corresponding Source for a work in source code form is that same
-work.
-
-### 2. Basic Permissions.
-
-All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-You may make, run and propagate covered works that you do not convey,
-without conditions so long as your license otherwise remains in force.
-You may convey covered works to others for the sole purpose of having
-them make modifications exclusively for you, or provide you with
-facilities for running those works, provided that you comply with the
-terms of this License in conveying all material for which you do not
-control copyright. Those thus making or running the covered works for
-you must do so exclusively on your behalf, under your direction and
-control, on terms that prohibit them from making any copies of your
-copyrighted material outside their relationship with you.
-
-Conveying under any other circumstances is permitted solely under the
-conditions stated below. Sublicensing is not allowed; section 10 makes
-it unnecessary.
-
-### 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such
-circumvention is effected by exercising rights under this License with
-respect to the covered work, and you disclaim any intention to limit
-operation or modification of the work as a means of enforcing, against
-the work's users, your or third parties' legal rights to forbid
-circumvention of technological measures.
-
-### 4. Conveying Verbatim Copies.
-
-You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-### 5. Conveying Modified Source Versions.
-
-You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these
-conditions:
-
-- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under
- section 7. This requirement modifies the requirement in section 4
- to "keep intact all notices".
-- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
-A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-### 6. Conveying Non-Source Forms.
-
-You may convey a covered work in object code form under the terms of
-sections 4 and 5, provided that you also convey the machine-readable
-Corresponding Source under the terms of this License, in one of these
-ways:
-
-- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the Corresponding
- Source from a network server at no charge.
-- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-- e) Convey the object code using peer-to-peer transmission,
- provided you inform other peers where the object code and
- Corresponding Source of the work are being offered to the general
- public at no charge under subsection 6d.
-
-A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal,
-family, or household purposes, or (2) anything designed or sold for
-incorporation into a dwelling. In determining whether a product is a
-consumer product, doubtful cases shall be resolved in favor of
-coverage. For a particular product received by a particular user,
-"normally used" refers to a typical or common use of that class of
-product, regardless of the status of the particular user or of the way
-in which the particular user actually uses, or expects or is expected
-to use, the product. A product is a consumer product regardless of
-whether the product has substantial commercial, industrial or
-non-consumer uses, unless such uses represent the only significant
-mode of use of the product.
-
-"Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to
-install and execute modified versions of a covered work in that User
-Product from a modified version of its Corresponding Source. The
-information must suffice to ensure that the continued functioning of
-the modified object code is in no case prevented or interfered with
-solely because modification has been made.
-
-If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or
-updates for a work that has been modified or installed by the
-recipient, or for the User Product in which it has been modified or
-installed. Access to a network may be denied when the modification
-itself materially and adversely affects the operation of the network
-or violates the rules and protocols for communication across the
-network.
-
-Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-### 7. Additional Terms.
-
-"Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders
-of that material) supplement the terms of this License with terms:
-
-- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-- c) Prohibiting misrepresentation of the origin of that material,
- or requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-- d) Limiting the use for publicity purposes of names of licensors
- or authors of the material; or
-- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions
- of it) with contractual assumptions of liability to the recipient,
- for any liability that these contractual assumptions directly
- impose on those licensors and authors.
-
-All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions; the
-above requirements apply either way.
-
-### 8. Termination.
-
-You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-However, if you cease all violation of this License, then your license
-from a particular copyright holder is reinstated (a) provisionally,
-unless and until the copyright holder explicitly and finally
-terminates your license, and (b) permanently, if the copyright holder
-fails to notify you of the violation by some reasonable means prior to
-60 days after the cessation.
-
-Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-### 9. Acceptance Not Required for Having Copies.
-
-You are not required to accept this License in order to receive or run
-a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-### 10. Automatic Licensing of Downstream Recipients.
-
-Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
-An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-### 11. Patents.
-
-A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
-A contributor's "essential patent claims" are all patent claims owned
-or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-A patent license is "discriminatory" if it does not include within the
-scope of its coverage, prohibits the exercise of, or is conditioned on
-the non-exercise of one or more of the rights that are specifically
-granted under this License. You may not convey a covered work if you
-are a party to an arrangement with a third party that is in the
-business of distributing software, under which you make payment to the
-third party based on the extent of your activity of conveying the
-work, and under which the third party grants, to any of the parties
-who would receive the covered work from you, a discriminatory patent
-license (a) in connection with copies of the covered work conveyed by
-you (or copies made from those copies), or (b) primarily for and in
-connection with specific products or compilations that contain the
-covered work, unless you entered into that arrangement, or that patent
-license was granted, prior to 28 March 2007.
-
-Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-### 12. No Surrender of Others' Freedom.
-
-If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under
-this License and any other pertinent obligations, then as a
-consequence you may not convey it at all. For example, if you agree to
-terms that obligate you to collect a royalty for further conveying
-from those to whom you convey the Program, the only way you could
-satisfy both those terms and this License would be to refrain entirely
-from conveying the Program.
-
-### 13. Remote Network Interaction; Use with the GNU General Public License.
-
-Notwithstanding any other provision of this License, if you modify the
-Program, your modified version must prominently offer all users
-interacting with it remotely through a computer network (if your
-version supports such interaction) an opportunity to receive the
-Corresponding Source of your version by providing access to the
-Corresponding Source from a network server at no charge, through some
-standard or customary means of facilitating copying of software. This
-Corresponding Source shall include the Corresponding Source for any
-work covered by version 3 of the GNU General Public License that is
-incorporated pursuant to the following paragraph.
-
-Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the work with which it is combined will remain governed by version
-3 of the GNU General Public License.
-
-### 14. Revised Versions of this License.
-
-The Free Software Foundation may publish revised and/or new versions
-of the GNU Affero General Public License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies that a certain numbered version of the GNU Affero General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU Affero General Public License, you may choose any version ever
-published by the Free Software Foundation.
-
-If the Program specifies that a proxy can decide which future versions
-of the GNU Affero General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-### 15. Disclaimer of Warranty.
-
-THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
-WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND
-PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
-DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
-CORRECTION.
-
-### 16. Limitation of Liability.
-
-IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR
-CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
-ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
-NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR
-LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM
-TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER
-PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-
-### 17. Interpretation of Sections 15 and 16.
-
-If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-END OF TERMS AND CONDITIONS
-
-## How to Apply These Terms to Your New Programs
-
-If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these
-terms.
-
-To do so, attach the following notices to the program. It is safest to
-attach them to the start of each source file to most effectively state
-the exclusion of warranty; and each file should have at least the
-"copyright" line and a pointer to where the full notice is found.
-
-
- Copyright (C)
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as
- published by the Free Software Foundation, either version 3 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-
-Also add information on how to contact you by electronic and paper
-mail.
-
-If your software can interact with users remotely through a computer
-network, you should also make sure that it provides a way for users to
-get its source. For example, if your program is a web application, its
-interface could display a "Source" link that leads users to an archive
-of the code. There are many ways you could offer source, and different
-solutions will be better for different programs; see section 13 for
-the specific requirements.
-
-You should also get your employer (if you work as a programmer) or
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary. For more information on this, and how to apply and follow
-the GNU AGPL, see .
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
diff --git a/Makefile b/Makefile
index 1fae97f6..4088caf4 100644
--- a/Makefile
+++ b/Makefile
@@ -60,6 +60,22 @@ container: update-package-versions
${DOCKER} build -f containers/Containerfile.ocr \
-t ${CONTAINER_BASE}/trustgraph-ocr:${VERSION} .
+some-containers:
+ ${DOCKER} build -f containers/Containerfile.base \
+ -t ${CONTAINER_BASE}/trustgraph-base:${VERSION} .
+ ${DOCKER} build -f containers/Containerfile.flow \
+ -t ${CONTAINER_BASE}/trustgraph-flow:${VERSION} .
+# ${DOCKER} build -f containers/Containerfile.vertexai \
+# -t ${CONTAINER_BASE}/trustgraph-vertexai:${VERSION} .
+# ${DOCKER} build -f containers/Containerfile.bedrock \
+# -t ${CONTAINER_BASE}/trustgraph-bedrock:${VERSION} .
+
+basic-containers: update-package-versions
+ ${DOCKER} build -f containers/Containerfile.base \
+ -t ${CONTAINER_BASE}/trustgraph-base:${VERSION} .
+ ${DOCKER} build -f containers/Containerfile.flow \
+ -t ${CONTAINER_BASE}/trustgraph-flow:${VERSION} .
+
container.ocr:
${DOCKER} build -f containers/Containerfile.ocr \
-t ${CONTAINER_BASE}/trustgraph-ocr:${VERSION} .
diff --git a/README.md b/README.md
index 46c8a077..54e19573 100644
--- a/README.md
+++ b/README.md
@@ -282,7 +282,22 @@ The default Grafana dashboard tracks the following:
[Developing for TrustGraph](docs/README.development.md)
## 📄 License
-**TrustGraph** is licensed under [AGPL-3.0](https://www.gnu.org/licenses/agpl-3.0.en.html).
+
+**TrustGraph** is licensed under [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0).
+
+ Copyright 2024-2025 TrustGraph
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
## 📞 Support & Community
- Bug Reports & Feature Requests: [Discord](https://discord.gg/sQMwkRz5GX)
diff --git a/templates/README.md b/templates/README.md
deleted file mode 100644
index 23039e9a..00000000
--- a/templates/README.md
+++ /dev/null
@@ -1,125 +0,0 @@
-
-# TrustGraph template generation
-
-There are two utilities here:
-
-- `generate`: Generates a single Docker Compose launch configuration
- based on configuration you provide.
-- `generate-all`: Generates the release bundle for releases. You won't
- need to use this unless you are managing releases.
-
-## `generate-all`
-
-Previously, this generates a full set of all vector DB / triple store / LLM
-combinations, and put them in a single ZIP file. But this got out of
-hand, so at the time of writing, this generates a single configuraton
-using Qdrant vector DB, Ollama LLM support and Cassandra for a triple store.
-
-The combinations are contained withing the code, it takes two arguments:
-- output ZIP file (is over-written)
-- TrustGraph version number
-
-```
-templates/generate-all output.zip 0.18.11
-```
-
-## `generate`
-
-This utility takes a configuration file describing the components to bundle,
-and outputs a Docker Compose YAML file.
-
-### Input configuration
-
-The input configuration is a JSON file, an array of components to pull into
-the configuration. For each component, there is a name and a (possibly empty)
-object describing addtional parameters for that component.
-
-Example:
-
-```
-[
- {
- "name": "cassandra",
- "parameters": {}
- },
- {
- "name": "pulsar",
- "parameters": {}
- },
- {
- "name": "qdrant",
- "parameters": {}
- },
- {
- "name": "embeddings-hf",
- "parameters": {}
- },
- {
- "name": "graph-rag",
- "parameters": {}
- },
- {
- "name": "grafana",
- "parameters": {}
- },
- {
- "name": "trustgraph",
- "parameters": {}
- },
- {
- "name": "googleaistudio",
- "parameters": {
- "googleaistudio-temperature": 0.3,
- "googleaistudio-max-output-tokens": 2048,
- "googleaistudio-model": "gemini-1.5-pro-002"
- }
- },
- {
- "name": "prompt-template",
- "parameters": {}
- },
- {
- "name": "override-recursive-chunker",
- "parameters": {
- "chunk-size": 1000,
- "chunk-overlap": 50
- }
- },
- {
- "name": "workbench-ui",
- "parameters": {}
- },
- {
- "name": "agent-manager-react",
- "parameters": {}
- }
-]
-```
-
-If you want to make your own configuration you could try changing the
-configuration above:
-- Components which are essential: pulsar, trustgraph, graph-rag, grafana,
- agent-manager-react
-- You need a triple store, one of: cassandra, memgraph, falkordb, neo4j
-- You need a vector store, one of: qdrant, pinecone
-- You need an LLM, one of: azure, azure-openai, bedrock, claude, cohere,
- llamafile, ollama, openai, vertexai.
-- You need an embeddings implementation, one of: embeddings-hf,
- embeddings-ollama
-- Optionally add the Workbench tool: workbench-ui
-
-Components have over-ridable parameters, look in the component definition
-in `templates/components/` to see what you can override.
-
-### Invocation
-
-Two parameters:
-- The output ZIP file
-- The version number
-
-The configuration file described above is provided on standard input
-
-```
-templates/generate out.zip 0.18.9 < config.json
-```
-
diff --git a/templates/all-patterns.jsonnet b/templates/all-patterns.jsonnet
deleted file mode 100644
index 3282be53..00000000
--- a/templates/all-patterns.jsonnet
+++ /dev/null
@@ -1,31 +0,0 @@
-[
- import "patterns/document-rag.jsonnet",
- import "patterns/embeddings-hf.jsonnet",
- import "patterns/embeddings-ollama.jsonnet",
- import "patterns/grafana.jsonnet",
- import "patterns/triple-store-cassandra.jsonnet",
- import "patterns/triple-store-neo4j.jsonnet",
- import "patterns/triple-store-falkordb.jsonnet",
- import "patterns/graph-rag.jsonnet",
- import "patterns/llm-azure.jsonnet",
- import "patterns/llm-azure-openai.jsonnet",
- import "patterns/llm-bedrock.jsonnet",
- import "patterns/llm-claude.jsonnet",
- import "patterns/llm-cohere.jsonnet",
- import "patterns/llm-llamafile.jsonnet",
- import "patterns/llm-mistral.jsonnet",
- import "patterns/llm-ollama.jsonnet",
- import "patterns/llm-openai.jsonnet",
- import "patterns/llm-vertexai.jsonnet",
- import "patterns/override-recursive-chunker.jsonnet",
- import "patterns/prompt-template-definitions.jsonnet",
- import "patterns/prompt-template-document-query.jsonnet",
- import "patterns/prompt-template-kg-query.jsonnet",
- import "patterns/prompt-template-relationships.jsonnet",
- import "patterns/prompt-template-rows-template.jsonnet",
- import "patterns/pulsar-manager.jsonnet",
- import "patterns/pulsar.jsonnet",
- import "patterns/trustgraph-base.jsonnet",
- import "patterns/vector-store-milvus.jsonnet",
- import "patterns/vector-store-qdrant.jsonnet",
-]
diff --git a/templates/base/base.jsonnet b/templates/base/base.jsonnet
deleted file mode 100644
index 9f82efb2..00000000
--- a/templates/base/base.jsonnet
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- restart: "on-failure:100",
-}
\ No newline at end of file
diff --git a/templates/components.jsonnet b/templates/components.jsonnet
deleted file mode 100644
index e94374ac..00000000
--- a/templates/components.jsonnet
+++ /dev/null
@@ -1,82 +0,0 @@
-{
-
- // Essentials
- "trustgraph-base": import "components/trustgraph.jsonnet",
- "pulsar": import "components/pulsar.jsonnet",
-
- // LLMs
- "azure": import "components/azure.jsonnet",
- "azure-openai": import "components/azure-openai.jsonnet",
- "bedrock": import "components/bedrock.jsonnet",
- "claude": import "components/claude.jsonnet",
- "cohere": import "components/cohere.jsonnet",
- "googleaistudio": import "components/googleaistudio.jsonnet",
- "lmstudio": import "components/lmstudio.jsonnet",
- "mistral": import "components/mistral.jsonnet",
- "ollama": import "components/ollama.jsonnet",
- "openai": import "components/openai.jsonnet",
- "vertexai": import "components/vertexai.jsonnet",
-
- // LLMs for RAG
- "azure-rag": import "components/azure-rag.jsonnet",
- "azure-openai-rag": import "components/azure-openai-rag.jsonnet",
- "bedrock-rag": import "components/bedrock-rag.jsonnet",
- "claude-rag": import "components/claude-rag.jsonnet",
- "cohere-rag": import "components/cohere-rag.jsonnet",
- "googleaistudio-rag": import "components/googleaistudio-rag.jsonnet",
- "lmstudio-rag": import "components/lmstudio-rag.jsonnet",
- "mistral-rag": import "components/mistral-rag.jsonnet",
- "ollama-rag": import "components/ollama-rag.jsonnet",
- "openai-rag": import "components/openai-rag.jsonnet",
- "vertexai-rag": import "components/vertexai-rag.jsonnet",
-
- // Embeddings
- "embeddings-ollama": import "components/embeddings-ollama.jsonnet",
- "embeddings-hf": import "components/embeddings-hf.jsonnet",
- "embeddings-fastembed": import "components/embeddings-fastembed.jsonnet",
-
- // Processing pipelines
- "graph-rag": import "components/graph-rag.jsonnet",
- "document-rag": import "components/document-rag.jsonnet",
-
- // OCR options
- "ocr": import "components/ocr.jsonnet",
- "mistral-ocr": import "components/mistral-ocr.jsonnet",
-
- // Librarian - document management
- "librarian": import "components/librarian.jsonnet",
-
- // Vector stores
- "vector-store-milvus": import "components/milvus.jsonnet",
- "vector-store-qdrant": import "components/qdrant.jsonnet",
- "vector-store-pinecone": import "components/pinecone.jsonnet",
-
- // Triples stores
- "triple-store-cassandra": import "components/cassandra.jsonnet",
- "triple-store-neo4j": import "components/neo4j.jsonnet",
- "triple-store-falkordb": import "components/falkordb.jsonnet",
- "triple-store-memgraph": import "components/memgraph.jsonnet",
-
- // Observability support
- "grafana": import "components/grafana.jsonnet",
-
- // Pulsar manager is a UI for Pulsar. Uses a LOT of memory
- "pulsar-manager": import "components/pulsar-manager.jsonnet",
-
- "llamafile": import "components/llamafile.jsonnet",
- "override-recursive-chunker": import "components/chunker-recursive.jsonnet",
-
- // The prompt manager
- "prompt-template": import "components/prompt-template.jsonnet",
- "prompt-overrides": import "components/prompt-overrides.jsonnet",
-
- // ReAct agent
- "agent-manager-react": import "components/agent-manager-react.jsonnet",
-
- // Optional UI
- "workbench-ui": import "components/workbench-ui.jsonnet",
-
- // Does nothing. But, can be a hack to overwrite parameters
- "null": {},
-
-}
diff --git a/templates/components/agent-manager-react.jsonnet b/templates/components/agent-manager-react.jsonnet
deleted file mode 100644
index 672a0439..00000000
--- a/templates/components/agent-manager-react.jsonnet
+++ /dev/null
@@ -1,64 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-local default_prompts = import "prompts/default-prompts.jsonnet";
-
-{
-
- tools:: [],
-
- "agent-manager" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("agent-manager")
- .with_image(images.trustgraph_flow)
- .with_command([
- "agent-manager-react",
- "-p",
- url.pulsar,
- "--prompt-request-queue",
- "non-persistent://tg/request/prompt-rag",
- "--prompt-response-queue",
- "non-persistent://tg/response/prompt-rag",
- "--tool-type",
- ] + [
- tool.id + "=" + tool.type
- for tool in $.tools
- ] + [
- "--tool-description"
- ] + [
- tool.id + "=" + tool.description
- for tool in $.tools
- ] + [
- "--tool-argument"
- ] + [
- "%s=%s:%s:%s" % [
- tool.id, arg.name, arg.type, arg.description
- ]
- for tool in $.tools
- for arg in tool.arguments
- ]
- )
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "agent-manager", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-} + default_prompts
-
diff --git a/templates/components/azure-openai-rag.jsonnet b/templates/components/azure-openai-rag.jsonnet
deleted file mode 100644
index 33355707..00000000
--- a/templates/components/azure-openai-rag.jsonnet
+++ /dev/null
@@ -1,61 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["ollama-rag-" + key]:: value,
- },
-
- "azure-openai-rag-model":: "GPT-3.5-Turbo",
- "azure-openai-rag-max-output-tokens":: 4192,
- "azure-openai-rag-temperature":: 0.0,
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("azure-openai-credentials")
- .with_env_var("AZURE_TOKEN", "azure-token");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-azure",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["azure-openai-rag-max-output-tokens"]),
- "-t",
- "%0.3f" % $["azure-openai-rag-temperature"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/azure-openai.jsonnet b/templates/components/azure-openai.jsonnet
deleted file mode 100644
index 3ecbbdac..00000000
--- a/templates/components/azure-openai.jsonnet
+++ /dev/null
@@ -1,59 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["azure-openai-" + key]:: value,
- },
-
- "azure-openai-model":: "GPT-3.5-Turbo",
- "azure-openai-max-output-tokens":: 4192,
- "azure-openai-temperature":: 0.0,
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("azure-openai-credentials")
- .with_env_var("AZURE_TOKEN", "azure-token");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-azure-openai",
- "-p",
- url.pulsar,
- "-m",
- $["azure-openai-model"],
- "-x",
- std.toString($["azure-openai-max-output-tokens"]),
- "-t",
- "%0.3f" % $["azure-openai-temperature"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/azure-rag.jsonnet b/templates/components/azure-rag.jsonnet
deleted file mode 100644
index 20b7306e..00000000
--- a/templates/components/azure-rag.jsonnet
+++ /dev/null
@@ -1,60 +0,0 @@
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["azure-rag-" + key]:: value,
- },
-
- "azure-rag-max-output-tokens":: 4096,
- "azure-rag-temperature":: 0.0,
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("azure-credentials")
- .with_env_var("AZURE_TOKEN", "azure-token")
- .with_env_var("AZURE_ENDPOINT", "azure-endpoint");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-azure",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["azure-rag-max-output-tokens"]),
- "-t",
- "%0.3f" % $["azure-rag-temperature"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- }
-
-} + prompts
-
diff --git a/templates/components/azure.jsonnet b/templates/components/azure.jsonnet
deleted file mode 100644
index c7746e23..00000000
--- a/templates/components/azure.jsonnet
+++ /dev/null
@@ -1,56 +0,0 @@
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["azure-" + key]:: value,
- },
-
- "azure-max-output-tokens":: 4096,
- "azure-temperature":: 0.0,
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("azure-credentials")
- .with_env_var("AZURE_TOKEN", "azure-token")
- .with_env_var("AZURE_ENDPOINT", "azure-endpoint");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-azure",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["azure-max-output-tokens"]),
- "-t",
- "%0.3f" % $["azure-temperature"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- }
-
-} + prompts
-
diff --git a/templates/components/bedrock-rag.jsonnet b/templates/components/bedrock-rag.jsonnet
deleted file mode 100644
index b265a9f2..00000000
--- a/templates/components/bedrock-rag.jsonnet
+++ /dev/null
@@ -1,66 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-local chunker = import "chunker-recursive.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["bedrock-rag-" + key]:: value,
- },
-
- "bedrock-rag-max-output-tokens":: 4096,
- "bedrock-rag-temperature":: 0.0,
- "bedrock-rag-model":: "mistral.mixtral-8x7b-instruct-v0:1",
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("bedrock-credentials")
- .with_env_var("AWS_ACCESS_KEY_ID", "aws-id-key")
- .with_env_var("AWS_SECRET_ACCESS_KEY", "aws-secret")
- .with_env_var("AWS_DEFAULT_REGION", "aws-region");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_bedrock)
- .with_command([
- "text-completion-bedrock",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["bedrock-rag-max-output-tokens"]),
- "-t",
- "%0.3f" % $["bedrock-rag-temperature"],
- "-m",
- $["bedrock-rag-model"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts + chunker
-
diff --git a/templates/components/bedrock.jsonnet b/templates/components/bedrock.jsonnet
deleted file mode 100644
index 6b599057..00000000
--- a/templates/components/bedrock.jsonnet
+++ /dev/null
@@ -1,62 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-local chunker = import "chunker-recursive.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["bedrock-" + key]:: value,
- },
-
- "bedrock-max-output-tokens":: 4096,
- "bedrock-temperature":: 0.0,
- "bedrock-model":: "mistral.mixtral-8x7b-instruct-v0:1",
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("bedrock-credentials")
- .with_env_var("AWS_ACCESS_KEY_ID", "aws-id-key")
- .with_env_var("AWS_SECRET_ACCESS_KEY", "aws-secret")
- .with_env_var("AWS_DEFAULT_REGION", "aws-region");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_bedrock)
- .with_command([
- "text-completion-bedrock",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["bedrock-max-output-tokens"]),
- "-t",
- "%0.3f" % $["bedrock-temperature"],
- "-m",
- $["bedrock-model"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts + chunker
-
diff --git a/templates/components/cassandra.jsonnet b/templates/components/cassandra.jsonnet
deleted file mode 100644
index 92ecf69f..00000000
--- a/templates/components/cassandra.jsonnet
+++ /dev/null
@@ -1,74 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local cassandra_hosts = "cassandra";
-local cassandra = import "stores/cassandra.jsonnet";
-
-cassandra + {
-
- "store-triples" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("store-triples")
- .with_image(images.trustgraph_flow)
- .with_command([
- "triples-write-cassandra",
- "-p",
- url.pulsar,
- "-g",
- cassandra_hosts,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-triples", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "query-triples" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("query-triples")
- .with_image(images.trustgraph_flow)
- .with_command([
- "triples-query-cassandra",
- "-p",
- url.pulsar,
- "-g",
- cassandra_hosts,
- ])
- .with_limits("0.5", "512M")
- .with_reservations("0.1", "512M");
-
- local containerSet = engine.containers(
- "query-triples", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- }
-
-}
-
diff --git a/templates/components/chunker-recursive.jsonnet b/templates/components/chunker-recursive.jsonnet
deleted file mode 100644
index 4a174366..00000000
--- a/templates/components/chunker-recursive.jsonnet
+++ /dev/null
@@ -1,46 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- "chunk-size":: 2000,
- "chunk-overlap":: 100,
-
- "chunker" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("chunker")
- .with_image(images.trustgraph_flow)
- .with_command([
- "chunker-recursive",
- "-p",
- url.pulsar,
- "--chunk-size",
- std.toString($["chunk-size"]),
- "--chunk-overlap",
- std.toString($["chunk-overlap"]),
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "chunker", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/claude-rag.jsonnet b/templates/components/claude-rag.jsonnet
deleted file mode 100644
index 06d58db2..00000000
--- a/templates/components/claude-rag.jsonnet
+++ /dev/null
@@ -1,63 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["claude-rag-" + key]:: value,
- },
-
- "claude-rag-model":: "claude-3-sonnet-20240229",
- "claude-rag-max-output-tokens":: 4096,
- "claude-rag-temperature":: 0.0,
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("claude-credentials")
- .with_env_var("CLAUDE_KEY", "claude-key");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-claude",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["claude-rag-max-output-tokens"]),
- "-m",
- $["claude-rag-model"],
- "-t",
- "%0.3f" % $["claude-rag-temperature"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/claude.jsonnet b/templates/components/claude.jsonnet
deleted file mode 100644
index e43e7504..00000000
--- a/templates/components/claude.jsonnet
+++ /dev/null
@@ -1,59 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["claude-" + key]:: value,
- },
-
- "claude-model":: "claude-3-sonnet-20240229",
- "claude-max-output-tokens":: 4096,
- "claude-temperature":: 0.0,
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("claude-credentials")
- .with_env_var("CLAUDE_KEY", "claude-key");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-claude",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["claude-max-output-tokens"]),
- "-m",
- $["claude-model"],
- "-t",
- "%0.3f" % $["claude-temperature"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/cohere-rag.jsonnet b/templates/components/cohere-rag.jsonnet
deleted file mode 100644
index 6a142519..00000000
--- a/templates/components/cohere-rag.jsonnet
+++ /dev/null
@@ -1,56 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["cohere-rag-" + key]:: value,
- },
-
- "cohere-rag-temperature":: 0.0,
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("cohere-credentials")
- .with_env_var("COHERE_KEY", "cohere-key");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-cohere",
- "-p",
- url.pulsar,
- "-t",
- "%0.3f" % $["cohere-rag-temperature"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/cohere.jsonnet b/templates/components/cohere.jsonnet
deleted file mode 100644
index 093436fd..00000000
--- a/templates/components/cohere.jsonnet
+++ /dev/null
@@ -1,52 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["cohere-" + key]:: value,
- },
-
- "cohere-temperature":: 0.0,
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("cohere-credentials")
- .with_env_var("COHERE_KEY", "cohere-key");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-cohere",
- "-p",
- url.pulsar,
- "-t",
- "%0.3f" % $["cohere-temperature"],
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/document-rag.jsonnet b/templates/components/document-rag.jsonnet
deleted file mode 100644
index 2d9dda3d..00000000
--- a/templates/components/document-rag.jsonnet
+++ /dev/null
@@ -1,77 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- "document-rag-doc-limit":: 20,
-
- "document-rag" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("document-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "document-rag",
- "-p",
- url.pulsar,
- "--doc-limit",
- std.toString($["document-rag-doc-limit"]),
- "--prompt-request-queue",
- "non-persistent://tg/request/prompt-rag",
- "--prompt-response-queue",
- "non-persistent://tg/response/prompt-rag",
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "document-rag", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "document-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("document-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "document-embeddings",
- "-p",
- url.pulsar,
- ])
- .with_limits("1.0", "512M")
- .with_reservations("0.5", "512M");
-
- local containerSet = engine.containers(
- "document-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/embeddings-fastembed.jsonnet b/templates/components/embeddings-fastembed.jsonnet
deleted file mode 100644
index c1fe35ff..00000000
--- a/templates/components/embeddings-fastembed.jsonnet
+++ /dev/null
@@ -1,43 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- "embeddings-model":: "sentence-transformers/all-MiniLM-L6-v2",
-
- embeddings +: {
-
- create:: function(engine)
-
- local container =
- engine.container("embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "embeddings-fastembed",
- "-p",
- url.pulsar,
- "-m",
- $["embeddings-model"],
- ])
- .with_limits("1.0", "400M")
- .with_reservations("0.5", "400M");
-
- local containerSet = engine.containers(
- "embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/embeddings-hf.jsonnet b/templates/components/embeddings-hf.jsonnet
deleted file mode 100644
index 29ebbc48..00000000
--- a/templates/components/embeddings-hf.jsonnet
+++ /dev/null
@@ -1,43 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- "embeddings-model":: "all-MiniLM-L6-v2",
-
- embeddings +: {
-
- create:: function(engine)
-
- local container =
- engine.container("embeddings")
- .with_image(images.trustgraph_hf)
- .with_command([
- "embeddings-hf",
- "-p",
- url.pulsar,
- "-m",
- $["embeddings-model"],
- ])
- .with_limits("1.0", "400M")
- .with_reservations("0.5", "400M");
-
- local containerSet = engine.containers(
- "embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/embeddings-ollama.jsonnet b/templates/components/embeddings-ollama.jsonnet
deleted file mode 100644
index a26ad0ba..00000000
--- a/templates/components/embeddings-ollama.jsonnet
+++ /dev/null
@@ -1,45 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-
-{
-
- "embeddings-model":: "mxbai-embed-large",
- "ollama-url":: "${OLLAMA_HOST}",
-
- embeddings +: {
-
- create:: function(engine)
-
- local container =
- engine.container("embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "embeddings-ollama",
- "-p",
- url.pulsar,
- "-m",
- $["embeddings-model"],
- "-r",
- $["ollama-url"],
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/falkordb.jsonnet b/templates/components/falkordb.jsonnet
deleted file mode 100644
index c08896d3..00000000
--- a/templates/components/falkordb.jsonnet
+++ /dev/null
@@ -1,76 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local falkordb = import "stores/falkordb.jsonnet";
-
-falkordb + {
-
- "falkordb-url":: "falkor://falkordb:6379",
-
- "store-triples" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("store-triples")
- .with_image(images.trustgraph_flow)
- .with_command([
- "triples-write-falkordb",
- "-p",
- url.pulsar,
- "-g",
- $["falkordb-url"],
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-triples", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "query-triples" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("query-triples")
- .with_image(images.trustgraph_flow)
- .with_command([
- "triples-query-falkordb",
- "-p",
- url.pulsar,
- "-g",
- $["falkordb-url"],
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-triples", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
-
- }
-
-}
-
diff --git a/templates/components/googleaistudio-rag.jsonnet b/templates/components/googleaistudio-rag.jsonnet
deleted file mode 100644
index 332749e8..00000000
--- a/templates/components/googleaistudio-rag.jsonnet
+++ /dev/null
@@ -1,65 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["googleaistudio-rag-" + key]:: value,
- },
-
- "googleaistudio-rag-max-output-tokens":: 4096,
- "googleaistudio-rag-temperature":: 0.0,
- "googleaistudio-rag-model":: "gemini-1.5-flash-002",
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("googleaistudio-credentials")
- .with_env_var("GOOGLE_AI_STUDIO_KEY", "googleaistudio-key");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-googleaistudio",
- "-p",
- url.pulsar,
- "-x",
- std.toString(
- $["googleaistudio-rag-max-output-tokens"]
- ),
- "-t",
- "%0.3f" % $["googleaistudio-rag-temperature"],
- "-m",
- $["googleaistudio-rag-model"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/googleaistudio.jsonnet b/templates/components/googleaistudio.jsonnet
deleted file mode 100644
index 58c7807d..00000000
--- a/templates/components/googleaistudio.jsonnet
+++ /dev/null
@@ -1,59 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["googleaistudio-" + key]:: value,
- },
-
- "googleaistudio-max-output-tokens":: 4096,
- "googleaistudio-temperature":: 0.0,
- "googleaistudio-model":: "gemini-1.5-flash-002",
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("googleaistudio-credentials")
- .with_env_var("GOOGLE_AI_STUDIO_KEY", "googleaistudio-key");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-googleaistudio",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["googleaistudio-max-output-tokens"]),
- "-t",
- "%0.3f" % $["googleaistudio-temperature"],
- "-m",
- $["googleaistudio-model"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/grafana.jsonnet b/templates/components/grafana.jsonnet
deleted file mode 100644
index e968faec..00000000
--- a/templates/components/grafana.jsonnet
+++ /dev/null
@@ -1,122 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-
-{
-
- "prometheus" +: {
-
- create:: function(engine)
-
- local vol = engine.volume("prometheus-data").with_size("20G");
-
- local cfgVol = engine.configVolume(
- "prometheus-cfg", "prometheus",
- {
- "prometheus.yml": importstr "prometheus/prometheus.yml",
- }
- );
-
- local container =
- engine.container("prometheus")
- .with_image(images.prometheus)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M")
- .with_port(9090, 9090, "http")
- .with_volume_mount(cfgVol, "/etc/prometheus/")
- .with_volume_mount(vol, "/prometheus");
-
- local containerSet = engine.containers(
- "prometheus", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(9090, 9090, "http");
-
- engine.resources([
- cfgVol,
- vol,
- containerSet,
- service,
- ])
-
- },
-
- "grafana" +: {
-
- create:: function(engine)
-
- local vol = engine.volume("grafana-storage").with_size("20G");
-
- local provDashVol = engine.configVolume(
- "prov-dash", "grafana/provisioning/",
- {
- "dashboard.yml":
- importstr "grafana/provisioning/dashboard.yml",
- }
-
- );
-
- local provDataVol = engine.configVolume(
- "prov-data", "grafana/provisioning/",
- {
- "datasource.yml":
- importstr "grafana/provisioning/datasource.yml",
- }
-
- );
-
- local dashVol = engine.configVolume(
- "dashboards", "grafana/dashboards/",
- {
- "dashboard.json":
- importstr "grafana/dashboards/dashboard.json",
- }
-
- );
-
- local container =
- engine.container("grafana")
- .with_image(images.grafana)
- .with_environment({
- // GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin",
- // GF_AUTH_ANONYMOUS_ENABLED: "true",
- // GF_ORG_ROLE: "Admin",
- GF_ORG_NAME: "trustgraph.ai",
- // GF_SERVER_ROOT_URL: "https://example.com",
- })
- .with_limits("1.0", "256M")
- .with_reservations("0.5", "256M")
- .with_port(3000, 3000, "cassandra")
- .with_volume_mount(vol, "/var/lib/grafana")
- .with_volume_mount(
- provDashVol, "/etc/grafana/provisioning/dashboards/"
- )
- .with_volume_mount(
- provDataVol, "/etc/grafana/provisioning/datasources/"
- )
- .with_volume_mount(
- dashVol, "/var/lib/grafana/dashboards/"
- );
-
- local containerSet = engine.containers(
- "grafana", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(3000, 3000, "http");
-
- engine.resources([
- vol,
- provDashVol,
- provDataVol,
- dashVol,
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/graph-rag.jsonnet b/templates/components/graph-rag.jsonnet
deleted file mode 100644
index 8d3e2e38..00000000
--- a/templates/components/graph-rag.jsonnet
+++ /dev/null
@@ -1,175 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-
-{
-
- "graph-rag-entity-limit":: 50,
- "graph-rag-triple-limit":: 30,
- "graph-rag-max-subgraph-size":: 400,
- "graph-rag-max-path-length":: 2,
-
- "kg-extract-definitions" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("kg-extract-definitions")
- .with_image(images.trustgraph_flow)
- .with_command([
- "kg-extract-definitions",
- "-p",
- url.pulsar,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "kg-extract-definitions", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "kg-extract-relationships" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("kg-extract-relationships")
- .with_image(images.trustgraph_flow)
- .with_command([
- "kg-extract-relationships",
- "-p",
- url.pulsar,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "kg-extract-relationships", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "kg-extract-topics" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("kg-extract-topics")
- .with_image(images.trustgraph_flow)
- .with_command([
- "kg-extract-topics",
- "-p",
- url.pulsar,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "kg-extract-topics", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "graph-rag" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("graph-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "graph-rag",
- "-p",
- url.pulsar,
- "--prompt-request-queue",
- "non-persistent://tg/request/prompt-rag",
- "--prompt-response-queue",
- "non-persistent://tg/response/prompt-rag",
- "--entity-limit",
- std.toString($["graph-rag-entity-limit"]),
- "--triple-limit",
- std.toString($["graph-rag-triple-limit"]),
- "--max-subgraph-size",
- std.toString($["graph-rag-max-subgraph-size"]),
- "--max-path-length",
- std.toString($["graph-rag-max-path-length"]),
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "graph-rag", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "graph-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("graph-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "graph-embeddings",
- "-p",
- url.pulsar,
- ])
- .with_limits("1.0", "512M")
- .with_reservations("0.5", "512M");
-
- local containerSet = engine.containers(
- "graph-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/librarian.jsonnet b/templates/components/librarian.jsonnet
deleted file mode 100644
index 4df1b692..00000000
--- a/templates/components/librarian.jsonnet
+++ /dev/null
@@ -1,43 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local minio = import "stores/minio.jsonnet";
-local cassandra = import "stores/cassandra.jsonnet";
-
-{
-
- "librarian" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("librarian")
- .with_image(images.trustgraph_flow)
- .with_command([
- "librarian",
- "-p",
- url.pulsar,
- ])
- .with_limits("0.5", "256M")
- .with_reservations("0.1", "256M");
-
- local containerSet = engine.containers(
- "librarian", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
- // Minio and Cassandra are used by the Librarian
- + minio + cassandra
-
diff --git a/templates/components/llamafile-rag.jsonnet b/templates/components/llamafile-rag.jsonnet
deleted file mode 100644
index 262f586e..00000000
--- a/templates/components/llamafile-rag.jsonnet
+++ /dev/null
@@ -1,57 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/slm.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["llamafile-rag-" + key]:: value,
- },
-
- "llamafile-rag-model":: "LLaMA_CPP",
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("llamafile-credentials")
- .with_env_var("LLAMAFILE_URL", "llamafile-url");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-llamafile",
- "-p",
- url.pulsar,
- "-m",
- $["llamafile-rag-model"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/llamafile.jsonnet b/templates/components/llamafile.jsonnet
deleted file mode 100644
index f3e1efd3..00000000
--- a/templates/components/llamafile.jsonnet
+++ /dev/null
@@ -1,53 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/slm.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["llamafile-" + key]:: value,
- },
-
- "llamafile-model":: "LLaMA_CPP",
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("llamafile-credentials")
- .with_env_var("LLAMAFILE_URL", "llamafile-url");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-llamafile",
- "-p",
- url.pulsar,
- "-m",
- $["llamafile-model"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/lmstudio-rag.jsonnet b/templates/components/lmstudio-rag.jsonnet
deleted file mode 100644
index 70a94087..00000000
--- a/templates/components/lmstudio-rag.jsonnet
+++ /dev/null
@@ -1,63 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["lmstudio-rag-" + key]:: value,
- },
-
- "lmstudio-rag-max-output-tokens":: 4096,
- "lmstudio-rag-temperature":: 0.0,
- "lmstudio-rag-model":: "GPT-3.5-Turbo",
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("lmstudio-credentials")
- .with_env_var("LMSTUDIO_URL", "lmstudio-url");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-lmstudio",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["lmstudio-rag-max-output-tokens"]),
- "-t",
- "%0.3f" % $["lmstudio-rag-temperature"],
- "-m",
- $["lmstudio-rag-model"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/lmstudio.jsonnet b/templates/components/lmstudio.jsonnet
deleted file mode 100644
index 4fe1da58..00000000
--- a/templates/components/lmstudio.jsonnet
+++ /dev/null
@@ -1,59 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["lmstudio-" + key]:: value,
- },
-
- "lmstudio-max-output-tokens":: 4096,
- "lmstudio-temperature":: 0.0,
- "lmstudio-model":: "GPT-3.5-Turbo",
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("lmstudio-credentials")
- .with_env_var("LMSTUDIO_URL", "lmstudio-url");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-lmstudio",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["lmstudio-max-output-tokens"]),
- "-t",
- "%0.3f" % $["lmstudio-temperature"],
- "-m",
- $["lmstudio-model"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/memgraph.jsonnet b/templates/components/memgraph.jsonnet
deleted file mode 100644
index 21684a61..00000000
--- a/templates/components/memgraph.jsonnet
+++ /dev/null
@@ -1,81 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local memgraph = import "stores/memgraph.jsonnet";
-
-memgraph + {
-
- "memgraph-url":: "bolt://memgraph:7687",
- "memgraph-database":: "memgraph",
-
- "store-triples" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("store-triples")
- .with_image(images.trustgraph_flow)
- .with_command([
- "triples-write-memgraph",
- "-p",
- url.pulsar,
- "-g",
- $["memgraph-url"],
- "--database",
- $["memgraph-database"],
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-triples", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "query-triples" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("query-triples")
- .with_image(images.trustgraph_flow)
- .with_command([
- "triples-query-memgraph",
- "-p",
- url.pulsar,
- "-g",
- $["memgraph-url"],
- "--database",
- $["memgraph-database"],
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-triples", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
-
- }
-
-}
-
diff --git a/templates/components/milvus.jsonnet b/templates/components/milvus.jsonnet
deleted file mode 100644
index 27e5e316..00000000
--- a/templates/components/milvus.jsonnet
+++ /dev/null
@@ -1,139 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local cassandra_hosts = "cassandra";
-local milvus = import "stores/milvus.jsonnet";
-
-milvus + {
-
- "store-graph-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("store-graph-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "ge-write-milvus",
- "-p",
- url.pulsar,
- "-t",
- url.milvus,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-graph-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "query-graph-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("query-graph-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "ge-query-milvus",
- "-p",
- url.pulsar,
- "-t",
- url.milvus,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-graph-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "store-doc-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("store-doc-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "de-write-milvus",
- "-p",
- url.pulsar,
- "-t",
- url.milvus,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-doc-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "query-doc-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("query-doc-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "de-query-milvus",
- "-p",
- url.pulsar,
- "-t",
- url.milvus,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-doc-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
-
- }
-
-}
-
diff --git a/templates/components/mistral-ocr.jsonnet b/templates/components/mistral-ocr.jsonnet
deleted file mode 100644
index a70addd5..00000000
--- a/templates/components/mistral-ocr.jsonnet
+++ /dev/null
@@ -1,47 +0,0 @@
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["mistral-" + key]:: value,
- },
-
- "pdf-decoder" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("mistral-credentials")
- .with_env_var("MISTRAL_TOKEN", "mistral-token");
-
- local container =
- engine.container("mistral-ocr")
- .with_image(images.trustgraph_flow)
- .with_command([
- "pdf-ocr-mistral",
- "-p",
- url.pulsar,
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "mistral-ocr", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/mistral-rag.jsonnet b/templates/components/mistral-rag.jsonnet
deleted file mode 100644
index 12fbe8a5..00000000
--- a/templates/components/mistral-rag.jsonnet
+++ /dev/null
@@ -1,63 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["mistral-rag-" + key]:: value,
- },
-
- "mistral-rag-max-output-tokens":: 4096,
- "mistral-rag-temperature":: 0.0,
- "mistral-rag-model":: "ministral-8b-latest",
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("mistral-credentials")
- .with_env_var("MISTRAL_TOKEN", "mistral-token");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-mistral",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["mistral-rag-max-output-tokens"]),
- "-t",
- "%0.3f" % $["mistral-rag-temperature"],
- "-m",
- $["mistral-rag-model"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/mistral.jsonnet b/templates/components/mistral.jsonnet
deleted file mode 100644
index 4de332c9..00000000
--- a/templates/components/mistral.jsonnet
+++ /dev/null
@@ -1,59 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["mistral-" + key]:: value,
- },
-
- "mistral-max-output-tokens":: 4096,
- "mistral-temperature":: 0.0,
- "mistral-model":: "ministral-8b-latest",
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("mistral-credentials")
- .with_env_var("MISTRAL_TOKEN", "mistral-token");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-mistral",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["mistral-max-output-tokens"]),
- "-t",
- "%0.3f" % $["mistral-temperature"],
- "-m",
- $["mistral-model"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/neo4j.jsonnet b/templates/components/neo4j.jsonnet
deleted file mode 100644
index 7cebdc71..00000000
--- a/templates/components/neo4j.jsonnet
+++ /dev/null
@@ -1,76 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local neo4j = import "stores/neo4j.jsonnet";
-
-neo4j + {
-
- "neo4j-url":: "bolt://neo4j:7687",
-
- "store-triples" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("store-triples")
- .with_image(images.trustgraph_flow)
- .with_command([
- "triples-write-neo4j",
- "-p",
- url.pulsar,
- "-g",
- $["neo4j-url"],
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-triples", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "query-triples" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("query-triples")
- .with_image(images.trustgraph_flow)
- .with_command([
- "triples-query-neo4j",
- "-p",
- url.pulsar,
- "-g",
- $["neo4j-url"],
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-triples", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
-
- }
-
-}
-
diff --git a/templates/components/null.jsonnet b/templates/components/null.jsonnet
deleted file mode 100644
index 2c63c085..00000000
--- a/templates/components/null.jsonnet
+++ /dev/null
@@ -1,2 +0,0 @@
-{
-}
diff --git a/templates/components/ocr.jsonnet b/templates/components/ocr.jsonnet
deleted file mode 100644
index cdd49583..00000000
--- a/templates/components/ocr.jsonnet
+++ /dev/null
@@ -1,37 +0,0 @@
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-
-{
-
- "pdf-decoder" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("pdf-ocr")
- .with_image(images.trustgraph_ocr)
- .with_command([
- "pdf-ocr",
- "-p",
- url.pulsar,
- ])
- .with_limits("1.0", "512M")
- .with_reservations("0.1", "512M");
-
- local containerSet = engine.containers(
- "pdf-ocr", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/ollama-rag.jsonnet b/templates/components/ollama-rag.jsonnet
deleted file mode 100644
index 680adea5..00000000
--- a/templates/components/ollama-rag.jsonnet
+++ /dev/null
@@ -1,57 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["ollama-rag-" + key]:: value,
- },
-
- "ollama-rag-model":: "gemma2:9b",
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("ollama-credentials")
- .with_env_var("OLLAMA_HOST", "ollama-host");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-ollama",
- "-p",
- url.pulsar,
- "-m",
- $["ollama-rag-model"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/ollama.jsonnet b/templates/components/ollama.jsonnet
deleted file mode 100644
index 95f1abf0..00000000
--- a/templates/components/ollama.jsonnet
+++ /dev/null
@@ -1,53 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["ollama-" + key]:: value,
- },
-
- "ollama-model":: "gemma2:9b",
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("ollama-credentials")
- .with_env_var("OLLAMA_HOST", "ollama-host");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-ollama",
- "-p",
- url.pulsar,
- "-m",
- $["ollama-model"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/openai-rag.jsonnet b/templates/components/openai-rag.jsonnet
deleted file mode 100644
index bfb7dd98..00000000
--- a/templates/components/openai-rag.jsonnet
+++ /dev/null
@@ -1,63 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["openai-rag-" + key]:: value,
- },
-
- "openai-rag-max-output-tokens":: 4096,
- "openai-rag-temperature":: 0.0,
- "openai-rag-model":: "GPT-3.5-Turbo",
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("openai-credentials")
- .with_env_var("OPENAI_TOKEN", "openai-token");
-
- local containerRag =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-openai",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["openai-rag-max-output-tokens"]),
- "-t",
- "%0.3f" % $["openai-rag-temperature"],
- "-m",
- $["openai-rag-model"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSetRag = engine.containers(
- "text-completion-rag", [ containerRag ]
- );
-
- local serviceRag =
- engine.internalService(containerSetRag)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSetRag,
- serviceRag,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/openai.jsonnet b/templates/components/openai.jsonnet
deleted file mode 100644
index 9e0212d2..00000000
--- a/templates/components/openai.jsonnet
+++ /dev/null
@@ -1,59 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["openai-" + key]:: value,
- },
-
- "openai-max-output-tokens":: 4096,
- "openai-temperature":: 0.0,
- "openai-model":: "GPT-3.5-Turbo",
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("openai-credentials")
- .with_env_var("OPENAI_TOKEN", "openai-token");
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_flow)
- .with_command([
- "text-completion-openai",
- "-p",
- url.pulsar,
- "-x",
- std.toString($["openai-max-output-tokens"]),
- "-t",
- "%0.3f" % $["openai-temperature"],
- "-m",
- $["openai-model"],
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/pinecone.jsonnet b/templates/components/pinecone.jsonnet
deleted file mode 100644
index ede383a5..00000000
--- a/templates/components/pinecone.jsonnet
+++ /dev/null
@@ -1,153 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local cassandra_hosts = "cassandra";
-
-{
-
- "pinecone-cloud":: "aws",
- "pinecone-region":: "us-east-1",
-
- "store-graph-embeddings" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("pinecone-api-key")
- .with_env_var("PINECONE_API_KEY", "pinecone-api-key");
-
- local container =
- engine.container("store-graph-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "ge-write-pinecone",
- "-p",
- url.pulsar,
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-graph-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
- "query-graph-embeddings" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("pinecone-api-key")
- .with_env_var("PINECONE_API_KEY", "pinecone-api-key");
-
- local container =
- engine.container("query-graph-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "ge-query-pinecone",
- "-p",
- url.pulsar,
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-graph-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
- "store-doc-embeddings" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("pinecone-api-key")
- .with_env_var("PINECONE_API_KEY", "pinecone-api-key");
-
- local container =
- engine.container("store-doc-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "de-write-pinecone",
- "-p",
- url.pulsar,
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-doc-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
- "query-doc-embeddings" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("pinecone-api-key")
- .with_env_var("PINECONE_API_KEY", "pinecone-api-key");
-
- local container =
- engine.container("query-doc-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "de-query-pinecone",
- "-p",
- url.pulsar,
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-doc-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
-
- }
-
-}
-
diff --git a/templates/components/prompt-overrides.jsonnet b/templates/components/prompt-overrides.jsonnet
deleted file mode 100644
index 852ec09d..00000000
--- a/templates/components/prompt-overrides.jsonnet
+++ /dev/null
@@ -1,24 +0,0 @@
-local default_prompts = import "prompts/default-prompts.jsonnet";
-
-{
-
- with:: function(key, value)
- if (key == "system-template") then
- self + {
- prompts +:: {
- "system-template": value,
- }
- }
- else
- self + {
- prompts +:: {
- templates +:: {
- [key] +:: {
- prompt: value
- }
- }
- }
- },
-
-} + default_prompts
-
diff --git a/templates/components/prompt-template.jsonnet b/templates/components/prompt-template.jsonnet
deleted file mode 100644
index b3187c9b..00000000
--- a/templates/components/prompt-template.jsonnet
+++ /dev/null
@@ -1,125 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-local default_prompts = import "prompts/default-prompts.jsonnet";
-
-{
-
- prompts:: default_prompts,
-
- local prompt_template_args = [ "--prompt" ] + [
- p.key + "=" + p.value.prompt,
- for p in std.objectKeysValuesAll($.prompts.templates)
- ],
-
- local prompt_response_type_args = [ "--prompt-response-type" ] + [
- p.key + "=" + p.value["response-type"],
- for p in std.objectKeysValuesAll($.prompts.templates)
- if std.objectHas(p.value, "response-type")
- ],
-
- local prompt_schema_args = [ "--prompt-schema" ] + [
- (
- p.key + "=" +
- std.manifestJsonMinified(p.value["schema"])
- )
- for p in std.objectKeysValuesAll($.prompts.templates)
- if std.objectHas(p.value, "schema")
- ],
-
- local prompt_term_args = [ "--prompt-term" ] + [
- p.key + "=" + t.key + ":" + t.value
- for p in std.objectKeysValuesAll($.prompts.templates)
- if std.objectHas(p.value, "terms")
- for t in std.objectKeysValuesAll(p.value.terms)
- ],
-
- local prompt_args = prompt_template_args + prompt_response_type_args +
- prompt_schema_args + prompt_term_args,
-
- "prompt" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("prompt")
- .with_image(images.trustgraph_flow)
- .with_command([
- "prompt-template",
- "-p",
- url.pulsar,
-
- "--text-completion-request-queue",
- "non-persistent://tg/request/text-completion",
- "--text-completion-response-queue",
- "non-persistent://tg/response/text-completion",
-
- "--system-prompt",
- $["prompts"]["system-template"],
-
- ] + prompt_args
- )
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "prompt", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "prompt-rag" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("prompt-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "prompt-template",
- "-p",
- url.pulsar,
- "-i",
- "non-persistent://tg/request/prompt-rag",
- "-o",
- "non-persistent://tg/response/prompt-rag",
- "--text-completion-request-queue",
- "non-persistent://tg/request/text-completion-rag",
- "--text-completion-response-queue",
- "non-persistent://tg/response/text-completion-rag",
-
- "--system-prompt",
- $["prompts"]["system-template"],
-
- ] + prompt_args
- )
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "prompt-rag", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-} + default_prompts
-
diff --git a/templates/components/pulsar-manager.jsonnet b/templates/components/pulsar-manager.jsonnet
deleted file mode 100644
index 9a0b59b2..00000000
--- a/templates/components/pulsar-manager.jsonnet
+++ /dev/null
@@ -1,41 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-
-{
-
- "pulsar" +: {
-
- create:: function(engine)
-
-// FIXME: Should persist something?
-// local volume = engine.volume(...)
-
- local container =
- engine.container("pulsar")
- .with_image(images.pulsar_manager)
- .with_environment({
- SPRING_CONFIGURATION_FILE: "/pulsar-manager/pulsar-manager/application.properties",
- })
- .with_limits("0.5", "1.4G")
- .with_reservations("0.1", "1.4G")
- .with_port(9527, 9527, "api")
- .with_port(7750, 7750, "api2");
-
- local containerSet = engine.containers(
- "pulsar", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(9527, 9527, "api")
- .with_port(7750, 7750, "api2);
-
- engine.resources([
- containerSet,
- service,
- ])
-
- }
-
-}
-
diff --git a/templates/components/pulsar.jsonnet b/templates/components/pulsar.jsonnet
deleted file mode 100644
index d111f616..00000000
--- a/templates/components/pulsar.jsonnet
+++ /dev/null
@@ -1,191 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-
-// This is a Pulsar configuration. Non-standalone mode so we deploy
-// individual components: bookkeeper, broker and zookeeper.
-//
-// This also deploys the TrustGraph 'admin' container which initialises
-// TrustGraph-specific namespaces etc.
-
-{
-
- "pulsar" +: {
-
- create:: function(engine)
-
- // Zookeeper volume
- local zkVolume = engine.volume("zookeeper").with_size("1G");
-
- // Zookeeper container
- local zkContainer =
- engine.container("zookeeper")
- .with_image(images.pulsar)
- .with_command([
- "bash",
- "-c",
- "bin/apply-config-from-env.py conf/zookeeper.conf && bin/generate-zookeeper-config.sh conf/zookeeper.conf && exec bin/pulsar zookeeper"
- ])
- .with_limits("1", "400M")
- .with_reservations("0.05", "400M")
- .with_user("0:1000")
- .with_volume_mount(zkVolume, "/pulsar/data/zookeeper")
- .with_environment({
- "metadataStoreUrl": "zk:zookeeper:2181",
- "PULSAR_MEM": "-Xms256m -Xmx256m -XX:MaxDirectMemorySize=256m",
- })
- .with_port(2181, 2181, "zookeeper")
- .with_port(2888, 2888, "zookeeper2")
- .with_port(3888, 3888, "zookeeper3");
-
- // Pulsar cluster init container
- local initContainer =
- engine.container("pulsar-init")
- .with_image(images.pulsar)
- .with_command([
- "bash",
- "-c",
- "sleep 10 && bin/pulsar initialize-cluster-metadata --cluster cluster-a --zookeeper zookeeper:2181 --configuration-store zookeeper:2181 --web-service-url http://pulsar:8080 --broker-service-url pulsar://pulsar:6650",
- ])
- .with_limits("1", "512M")
- .with_reservations("0.05", "512M")
- .with_environment({
- "PULSAR_MEM": "-Xms256m -Xmx256m -XX:MaxDirectMemorySize=256m",
- });
-
-
- // Bookkeeper volume
- local bookieVolume = engine.volume("bookie").with_size("20G");
-
- // Bookkeeper container
- local bookieContainer =
- engine.container("bookie")
- .with_image(images.pulsar)
- .with_command([
- "bash",
- "-c",
- "bin/apply-config-from-env.py conf/bookkeeper.conf && exec bin/pulsar bookie"
- // false ^ causes this to be a 'failure' exit.
- ])
- .with_limits("1", "800M")
- .with_reservations("0.1", "800M")
- .with_user("0:1000")
- .with_volume_mount(bookieVolume, "/pulsar/data/bookkeeper")
- .with_environment({
- "clusterName": "cluster-a",
- "zkServers": "zookeeper:2181",
- "bookieId": "bookie",
- "metadataStoreUri": "metadata-store:zk:zookeeper:2181",
- "advertisedAddress": "bookie",
- "BOOKIE_MEM": "-Xms512m -Xmx512m -XX:MaxDirectMemorySize=256m",
- })
- .with_port(3181, 3181, "bookie");
-
- // Pulsar broker, stateless (uses ZK and Bookkeeper for state)
- local brokerContainer =
- engine.container("pulsar")
- .with_image(images.pulsar)
- .with_command([
- "bash",
- "-c",
- "bin/apply-config-from-env.py conf/broker.conf && exec bin/pulsar broker"
- ])
- .with_limits("1", "800M")
- .with_reservations("0.1", "800M")
- .with_environment({
- "metadataStoreUrl": "zk:zookeeper:2181",
- "zookeeperServers": "zookeeper:2181",
- "clusterName": "cluster-a",
- "managedLedgerDefaultEnsembleSize": "1",
- "managedLedgerDefaultWriteQuorum": "1",
- "managedLedgerDefaultAckQuorum": "1",
- "advertisedAddress": "pulsar",
- "advertisedListeners": "external:pulsar://pulsar:6650,localhost:pulsar://localhost:6650",
- "PULSAR_MEM": "-Xms512m -Xmx512m -XX:MaxDirectMemorySize=256m",
- })
- .with_port(6650, 6650, "pulsar")
- .with_port(8080, 8080, "admin");
-
- // Trustgraph Pulsar initialisation
- local adminContainer =
- engine.container("init-trustgraph")
- .with_image(images.trustgraph_flow)
- .with_command([
- "tg-init-pulsar",
- "-p",
- url.pulsar_admin,
- ])
- .with_limits("1", "128M")
- .with_reservations("0.1", "128M");
-
- // Container sets
- local zkContainerSet = engine.containers(
- "zookeeper",
- [
- zkContainer,
- ]
- );
-
- local initContainerSet = engine.containers(
- "init-pulsar",
- [
- initContainer,
- ]
- );
-
- local bookieContainerSet = engine.containers(
- "bookie",
- [
- bookieContainer,
- ]
- );
-
- local brokerContainerSet = engine.containers(
- "pulsar",
- [
- brokerContainer,
- ]
- );
-
- local adminContainerSet = engine.containers(
- "init-pulsar",
- [
- adminContainer
- ]
- );
-
- // Zookeeper service
- local zkService =
- engine.service(zkContainerSet)
- .with_port(2181, 2181, "zookeeper")
- .with_port(2888, 2888, "zookeeper2")
- .with_port(3888, 3888, "zookeeper3");
-
- // Bookkeeper service
- local bookieService =
- engine.service(bookieContainerSet)
- .with_port(3181, 3181, "bookie");
-
- // Pulsar broker service
- local brokerService =
- engine.service(brokerContainerSet)
- .with_port(6650, 6650, "pulsar")
- .with_port(8080, 8080, "admin");
-
- engine.resources([
- zkVolume,
- bookieVolume,
- zkContainerSet,
- initContainerSet,
- bookieContainerSet,
- brokerContainerSet,
- adminContainerSet,
- zkService,
- bookieService,
- brokerService,
- ])
-
- }
-
-}
-
diff --git a/templates/components/qdrant.jsonnet b/templates/components/qdrant.jsonnet
deleted file mode 100644
index 352cb741..00000000
--- a/templates/components/qdrant.jsonnet
+++ /dev/null
@@ -1,139 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local cassandra_hosts = "cassandra";
-local qdrant = import "stores/qdrant.jsonnet";
-
-qdrant + {
-
- "store-graph-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("store-graph-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "ge-write-qdrant",
- "-p",
- url.pulsar,
- "-t",
- url.qdrant,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-graph-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "query-graph-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("query-graph-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "ge-query-qdrant",
- "-p",
- url.pulsar,
- "-t",
- url.qdrant,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-graph-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "store-doc-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("store-doc-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "de-write-qdrant",
- "-p",
- url.pulsar,
- "-t",
- url.qdrant,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "store-doc-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "query-doc-embeddings" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("query-doc-embeddings")
- .with_image(images.trustgraph_flow)
- .with_command([
- "de-query-qdrant",
- "-p",
- url.pulsar,
- "-t",
- url.qdrant,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "query-doc-embeddings", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8080, 8080, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
-
- }
-
-}
-
diff --git a/templates/components/trustgraph.jsonnet b/templates/components/trustgraph.jsonnet
deleted file mode 100644
index 833d932b..00000000
--- a/templates/components/trustgraph.jsonnet
+++ /dev/null
@@ -1,184 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-
-{
-
- "api-gateway-port":: 8088,
- "api-gateway-timeout":: 600,
-
- "chunk-size":: 250,
- "chunk-overlap":: 15,
-
- "api-gateway" +: {
-
- create:: function(engine)
-
- local envSecrets = engine.envSecrets("gateway-secret")
- .with_env_var("GATEWAY_SECRET", "gateway-secret");
-
- local port = $["api-gateway-port"];
-
- local container =
- engine.container("api-gateway")
- .with_image(images.trustgraph_flow)
- .with_command([
- "api-gateway",
- "-p",
- url.pulsar,
- "--timeout",
- std.toString($["api-gateway-timeout"]),
- "--port",
- std.toString(port),
- ])
- .with_env_var_secrets(envSecrets)
- .with_limits("0.5", "256M")
- .with_reservations("0.1", "256M")
- .with_port(8000, 8000, "metrics")
- .with_port(port, port, "api");
-
- local containerSet = engine.containers(
- "api-gateway", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics")
- .with_port(port, port, "api");
-
- engine.resources([
- envSecrets,
- containerSet,
- service,
- ])
-
- },
-
- "chunker" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("chunker")
- .with_image(images.trustgraph_flow)
- .with_command([
- "chunker-token",
- "-p",
- url.pulsar,
- "--chunk-size",
- std.toString($["chunk-size"]),
- "--chunk-overlap",
- std.toString($["chunk-overlap"]),
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "chunker", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "pdf-decoder" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("pdf-decoder")
- .with_image(images.trustgraph_flow)
- .with_command([
- "pdf-decoder",
- "-p",
- url.pulsar,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "pdf-decoder", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "metering" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("metering")
- .with_image(images.trustgraph_flow)
- .with_command([
- "metering",
- "-p",
- url.pulsar,
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "metering", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
- "metering-rag" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("metering-rag")
- .with_image(images.trustgraph_flow)
- .with_command([
- "metering",
- "-p",
- url.pulsar,
- "-i",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_limits("0.5", "128M")
- .with_reservations("0.1", "128M");
-
- local containerSet = engine.containers(
- "metering-rag", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/components/vertexai-rag.jsonnet b/templates/components/vertexai-rag.jsonnet
deleted file mode 100644
index 0b5cf9a3..00000000
--- a/templates/components/vertexai-rag.jsonnet
+++ /dev/null
@@ -1,74 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["vertexai-rag-" + key]:: value,
- },
-
- "vertexai-rag-model":: "gemini-1.0-pro-001",
- "vertexai-rag-private-key":: "/vertexai/private.json",
- "vertexai-rag-region":: "us-central1",
- "vertexai-rag-max-output-tokens":: 4096,
- "vertexai-rag-temperature":: 0.0,
-
- "text-completion-rag" +: {
-
- create:: function(engine)
-
- local cfgVol = engine.secretVolume(
- "vertexai-creds",
- "./vertexai",
- {
- "private.json": importstr "vertexai/private.json",
- }
- );
-
- local container =
- engine.container("text-completion-rag")
- .with_image(images.trustgraph_vertexai)
- .with_command([
- "text-completion-vertexai",
- "-p",
- url.pulsar,
- "-k",
- $["vertexai-rag-private-key"],
- "-r",
- $["vertexai-rag-region"],
- "-x",
- std.toString($["vertexai-rag-max-output-tokens"]),
- "-t",
- "%0.3f" % $["vertexai-rag-temperature"],
- "-m",
- $["vertexai-rag-model"],
- "-i",
- "non-persistent://tg/request/text-completion-rag",
- "-o",
- "non-persistent://tg/response/text-completion-rag",
- ])
- .with_limits("0.5", "256M")
- .with_reservations("0.1", "256M")
- .with_volume_mount(cfgVol, "/vertexai");
-
- local containerSet = engine.containers(
- "text-completion-rag", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- cfgVol,
- containerSet,
- service,
- ])
-
- }
-
-} + prompts
-
diff --git a/templates/components/vertexai.jsonnet b/templates/components/vertexai.jsonnet
deleted file mode 100644
index 0e3550c5..00000000
--- a/templates/components/vertexai.jsonnet
+++ /dev/null
@@ -1,70 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local url = import "values/url.jsonnet";
-local prompts = import "prompts/mixtral.jsonnet";
-
-{
-
- with:: function(key, value)
- self + {
- ["vertexai-" + key]:: value,
- },
-
- "vertexai-model":: "gemini-1.0-pro-001",
- "vertexai-private-key":: "/vertexai/private.json",
- "vertexai-region":: "us-central1",
- "vertexai-max-output-tokens":: 4096,
- "vertexai-temperature":: 0.0,
-
- "text-completion" +: {
-
- create:: function(engine)
-
- local cfgVol = engine.secretVolume(
- "vertexai-creds",
- "./vertexai",
- {
- "private.json": importstr "vertexai/private.json",
- }
- );
-
- local container =
- engine.container("text-completion")
- .with_image(images.trustgraph_vertexai)
- .with_command([
- "text-completion-vertexai",
- "-p",
- url.pulsar,
- "-k",
- $["vertexai-private-key"],
- "-r",
- $["vertexai-region"],
- "-x",
- std.toString($["vertexai-max-output-tokens"]),
- "-t",
- "%0.3f" % $["vertexai-temperature"],
- "-m",
- $["vertexai-model"],
- ])
- .with_limits("0.5", "256M")
- .with_reservations("0.1", "256M")
- .with_volume_mount(cfgVol, "/vertexai");
-
- local containerSet = engine.containers(
- "text-completion", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8000, 8000, "metrics");
-
- engine.resources([
- cfgVol,
- containerSet,
- service,
- ])
-
- },
-
-} + prompts
-
diff --git a/templates/components/workbench-ui.jsonnet b/templates/components/workbench-ui.jsonnet
deleted file mode 100644
index f2048e47..00000000
--- a/templates/components/workbench-ui.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-local images = import "values/images.jsonnet";
-
-{
-
- "workbench-ui" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("workbench-ui")
- .with_image(images["workbench-ui"])
- .with_limits("0.1", "256M")
- .with_reservations("0.1", "256M")
- .with_port(8888, 8888, "ui");
-
- local containerSet = engine.containers(
- "workbench-ui", [ container ]
- );
-
- local service =
- engine.internalService(containerSet)
- .with_port(8888, 8888, "ui");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/config-to-aks-k8s.jsonnet b/templates/config-to-aks-k8s.jsonnet
deleted file mode 100644
index c603a0d8..00000000
--- a/templates/config-to-aks-k8s.jsonnet
+++ /dev/null
@@ -1,16 +0,0 @@
-
-local engine = import "engine/aks-k8s.jsonnet";
-local decode = import "util/decode-config.jsonnet";
-local components = import "components.jsonnet";
-
-// Import config
-local config = import "config.json";
-
-// Produce patterns from config
-local patterns = decode(config);
-
-// Extract resources usnig the engine
-local resourceList = engine.package(patterns);
-
-resourceList
-
diff --git a/templates/config-to-docker-compose.jsonnet b/templates/config-to-docker-compose.jsonnet
deleted file mode 100644
index 442d2cb7..00000000
--- a/templates/config-to-docker-compose.jsonnet
+++ /dev/null
@@ -1,20 +0,0 @@
-
-local engine = import "engine/docker-compose.jsonnet";
-local decode = import "util/decode-config.jsonnet";
-local components = import "components.jsonnet";
-
-// Import config
-local config = import "config.json";
-
-// Produce patterns from config
-local patterns = decode(config);
-
-// Extract resources usnig the engine
-local resources = std.foldl(
- function(state, p) state + p.create(engine),
- std.objectValues(patterns),
- {}
-);
-
-resources
-
diff --git a/templates/config-to-gcp-k8s.jsonnet b/templates/config-to-gcp-k8s.jsonnet
deleted file mode 100644
index 3d089a24..00000000
--- a/templates/config-to-gcp-k8s.jsonnet
+++ /dev/null
@@ -1,16 +0,0 @@
-
-local engine = import "engine/gcp-k8s.jsonnet";
-local decode = import "util/decode-config.jsonnet";
-local components = import "components.jsonnet";
-
-// Import config
-local config = import "config.json";
-
-// Produce patterns from config
-local patterns = decode(config);
-
-// Extract resources usnig the engine
-local resourceList = engine.package(patterns);
-
-resourceList
-
diff --git a/templates/config-to-minikube-k8s.jsonnet b/templates/config-to-minikube-k8s.jsonnet
deleted file mode 100644
index 073358cb..00000000
--- a/templates/config-to-minikube-k8s.jsonnet
+++ /dev/null
@@ -1,26 +0,0 @@
-
-local engine = import "engine/minikube-k8s.jsonnet";
-local decode = import "util/decode-config.jsonnet";
-local components = import "components.jsonnet";
-
-// Import config
-local config = import "config.json";
-
-// Produce patterns from config
-local patterns = decode(config);
-
-local ns = {
- apiVersion: "v1",
- kind: "Namespace",
- metadata: {
- name: "trustgraph",
- },
- "spec": {
- },
-};
-
-// Extract resources using the engine
-local resourceList = engine.package(patterns);
-
-resourceList
-
diff --git a/templates/engine/aks-k8s.jsonnet b/templates/engine/aks-k8s.jsonnet
deleted file mode 100644
index 69bca03f..00000000
--- a/templates/engine/aks-k8s.jsonnet
+++ /dev/null
@@ -1,45 +0,0 @@
-
-local k8s = import "k8s.jsonnet";
-
-local ns = {
- apiVersion: "v1",
- kind: "Namespace",
- metadata: {
- name: "trustgraph",
- },
- "spec": {
- },
-};
-
-local sc = {
- apiVersion: "storage.k8s.io/v1",
- kind: "StorageClass",
- metadata: {
- name: "tg",
- },
- provisioner: "disk.csi.azure.com",
- parameters: {
- // Standard disks (spinning magnetic), Locally Redundant Storage
- // Cheapest, basically
- skuName: "Standard_LRS",
- },
- reclaimPolicy: "Delete",
- volumeBindingMode: "WaitForFirstConsumer",
-};
-
-k8s + {
-
- // Extract resources usnig the engine
- package:: function(patterns)
- local resources = [sc, ns] + std.flattenArrays([
- p.create(self) for p in std.objectValues(patterns)
- ]);
- local resourceList = {
- apiVersion: "v1",
- kind: "List",
- items: [ns, sc] + resources,
- };
- resourceList
-
-}
-
diff --git a/templates/engine/docker-compose.jsonnet b/templates/engine/docker-compose.jsonnet
deleted file mode 100644
index 0be3c3e3..00000000
--- a/templates/engine/docker-compose.jsonnet
+++ /dev/null
@@ -1,237 +0,0 @@
-{
-
- // Extract resources usnig the engine
- package:: function(patterns)
- std.foldl(
- function(state, p) state + p.create(self),
- std.objectValues(patterns),
- {}
- ),
-
- container:: function(name)
- {
-
- local container = self,
-
- name: name,
- limits: {},
- reservations: {},
- ports: [],
- volumes: [],
- environment: {},
-
- with_image:: function(x) self + { image: x },
-
- with_user:: function(x) self + { user: x },
-
- with_command:: function(x) self + { command: x },
-
- with_environment:: function(x) self + {
- environment: super.environment + x,
- },
-
- with_limits:: function(c, m) self + { limits: { cpus: c, memory: m } },
-
- with_reservations::
- function(c, m) self + { reservations: { cpus: c, memory: m } },
-
- with_volume_mount::
- function(vol, mnt)
- self + {
- volumes: super.volumes + [{
- volume: vol, mount: mnt
- }]
- },
-
- with_port::
- function(src, dest, name) self + {
- ports: super.ports + [
- { src: src, dest: dest, name : name }
- ]
- },
-
- with_env_var_secrets::
- function(vars)
- std.foldl(
- function(obj, x) obj.with_environment(
- { [x]: "${" + x + "}" }
- ),
- vars.variables,
- self
- ),
-
- add:: function() {
- services +: {
- [container.name]: {
- image: container.image,
- deploy: {
- resources: {
- limits: container.limits,
- reservations: container.reservations,
- }
- },
- restart: "on-failure:100",
- } +
-
- (if std.objectHas(container, "command") then
- { command: container.command }
- else {}) +
-
- (if std.objectHas(container, "user") then
- { user: container.user }
- else {}) +
-
- (if ! std.isEmpty(container.environment) then
- { environment: container.environment }
- else {}) +
-
- (if std.length(container.ports) > 0 then
- {
- ports: [
- "%d:%d" % [port.src, port.dest]
- for port in container.ports
- ]
- }
- else {}) +
-
- (if std.length(container.volumes) > 0 then
- {
- volumes: [
- "%s:%s" % [vol.volume.volid, vol.mount]
- for vol in container.volumes
- ]
- }
- else {})
-
- }
- }
-
- },
-
- internalService:: function(containers)
- {
-
- local service = self,
-
- name: containers.name,
-
- with_port:: function(src, dest, name)
- self + { port: [src, dest] },
-
- add:: function() {
- }
-
- },
-
- service:: function(containers)
- {
-
- local service = self,
-
- name: containers.name,
-
- with_port:: function(src, dest, name)
- self + { port: [src, dest] },
-
- add:: function() {
- }
-
- },
-
- volume:: function(name)
- {
-
- local volume = self,
-
- name: name,
-
- volid:: name,
-
- with_size:: function(size) self + { size: size },
-
- add:: function() {
- volumes +: {
- [volume.name]: {}
- }
- }
-
- },
-
- configVolume:: function(name, dir, parts)
- {
-
- local volume = self,
-
- name: dir,
-
- volid:: "./" + dir,
-
- with_size:: function(size) self + { size: size },
-
- add:: function() {
- }
-
- },
-
- secretVolume:: function(name, dir, parts)
- {
-
- local volume = self,
-
- name: dir,
-
- volid:: dir,
-
- with_size:: function(size) self + { size: size },
-
- add:: function() {
- }
-
- },
-
- envSecrets:: function(name)
- {
-
- local volume = self,
-
- name: name,
-
- volid:: name,
-
- variables:: [],
-
- with_env_var::
- function(name, key) self + {
- variables: super.variables + [name],
- },
-
- add:: function() {
- }
-
- },
-
- containers:: function(name, containers)
- {
-
- local cont = self,
-
- name: name,
- containers: containers,
-
- add:: function() std.foldl(
- function(state, c) state + c.add(),
- cont.containers,
- {}
- ),
-
- },
-
- resources:: function(res)
- std.foldl(
- function(state, c) state + c.add(),
- res,
- {}
- ),
-
-}
-
diff --git a/templates/engine/gcp-k8s.jsonnet b/templates/engine/gcp-k8s.jsonnet
deleted file mode 100644
index 0605a570..00000000
--- a/templates/engine/gcp-k8s.jsonnet
+++ /dev/null
@@ -1,44 +0,0 @@
-
-local k8s = import "k8s.jsonnet";
-
-local ns = {
- apiVersion: "v1",
- kind: "Namespace",
- metadata: {
- name: "trustgraph",
- },
- "spec": {
- },
-};
-
-local sc = {
- apiVersion: "storage.k8s.io/v1",
- kind: "StorageClass",
- metadata: {
- name: "tg",
- },
- provisioner: "pd.csi.storage.gke.io",
- parameters: {
- type: "pd-balanced",
- "csi.storage.k8s.io/fstype": "ext4",
- },
- reclaimPolicy: "Delete",
- volumeBindingMode: "WaitForFirstConsumer",
-};
-
-k8s + {
-
- // Extract resources usnig the engine
- package:: function(patterns)
- local resources = [sc, ns] + std.flattenArrays([
- p.create(self) for p in std.objectValues(patterns)
- ]);
- local resourceList = {
- apiVersion: "v1",
- kind: "List",
- items: [ns, sc] + resources,
- };
- resourceList
-
-}
-
diff --git a/templates/engine/k8s.jsonnet b/templates/engine/k8s.jsonnet
deleted file mode 100644
index dfd8d11f..00000000
--- a/templates/engine/k8s.jsonnet
+++ /dev/null
@@ -1,358 +0,0 @@
-{
-
- container:: function(name)
- {
-
- local container = self,
-
- name: name,
- limits: {},
- reservations: {},
- ports: [],
- volumes: [],
- environment: [],
-
- with_image:: function(x) self + { image: x },
-
- with_user:: function(x) self + { user: x },
-
- with_command:: function(x) self + { command: x },
-
- with_environment:: function(x) self + {
- environment: super.environment + [
- {
- name: v.key, value: v.value
- }
- for v in std.objectKeysValues(x)
- ],
- },
-
- with_limits:: function(c, m) self + { limits: { cpu: c, memory: m } },
-
- with_reservations::
- function(c, m) self + { reservations: { cpu: c, memory: m } },
-
- with_volume_mount::
- function(vol, mnt)
- self + {
- volumes: super.volumes + [{
- volume: vol, mount: mnt
- }]
- },
-
- with_port::
- function(src, dest, name) self + {
- ports: super.ports + [
- { src: src, dest: dest, name : name }
- ]
- },
-
- with_env_var_secrets::
- function(vars)
- std.foldl(
- function(obj, x) obj + {
- environment: super.environment + [{
- name: x,
- valueFrom: {
- secretKeyRef: {
- name: vars.name,
- key: vars.keyMap[x],
- }
- }
- }]
- },
- vars.variables,
- self
- ),
-
- add:: function() [
-
- {
- apiVersion: "apps/v1",
- kind: "Deployment",
- metadata: {
- name: container.name,
- namespace: "trustgraph",
- labels: {
- app: container.name
- }
- },
- spec: {
- replicas: 1,
- selector: {
- matchLabels: {
- app: container.name,
- }
- },
- template: {
- metadata: {
- labels: {
- app: container.name,
- }
- },
- spec: {
- containers: [
- {
- name: container.name,
- image: container.image,
-
- // FIXME: Make everything run as
- // root. Needed to get filesystems
- // to be accessible. There's a
- // better way of doing this?
- securityContext: {
- runAsUser: 0,
- runAsGroup: 0,
- },
-
- resources: {
- requests: container.reservations,
- limits: container.limits
- },
- } + (
- if std.length(container.ports) > 0 then
- {
- ports: [
- {
- hostPort: port.src,
- containerPort: port.dest,
- }
- for port in container.ports
- ]
- } else
- {}) +
-
- (if std.objectHas(container, "command") then
- { command: container.command }
- else {}) +
-
- (if ! std.isEmpty(container.environment) then
- {
- env: container.environment,
- }
- else {}) +
-
- (if std.length(container.volumes) > 0 then
- {
- volumeMounts: [
- {
- mountPath: vol.mount,
- name: vol.volume.name,
- }
- for vol in container.volumes
- ]
- }
-
- else
- {}
- )
- ],
- volumes: [
- vol.volume.volRef()
- for vol in container.volumes
-
- ]
- }
- },
- } + {}
-
- }
-
- ]
-
- },
-
- // Just an alias
- internalService:: self.service,
-
- service:: function(containers)
- {
-
- local service = self,
-
- name: containers.name,
-
- ports: [],
-
- with_port::
- function(src, dest, name)
- self + {
- ports: super.ports + [
- { src: src, dest: dest, name: name }
- ]
- },
-
- add:: function() [
-
- {
-
- apiVersion: "v1",
- kind: "Service",
- metadata: {
- name: service.name,
- namespace: "trustgraph",
- },
- spec: {
- selector: {
- app: service.name,
- },
- ports: [
- {
- port: port.src,
- targetPort: port.dest,
- name: port.name,
- }
- for port in service.ports
- ],
- }
- }
- ],
-
- },
-
- volume:: function(name)
- {
-
- local volume = self,
-
- name: name,
-
- with_size:: function(size) self + { size: size },
-
- add:: function() [
- {
- apiVersion: "v1",
- kind: "PersistentVolumeClaim",
- metadata: {
- name: volume.name,
- namespace: "trustgraph",
- },
- spec: {
- storageClassName: "tg",
- accessModes: [ "ReadWriteOnce" ],
- resources: {
- requests: {
- storage: volume.size,
- }
- },
- }
- }
- ],
-
- volRef:: function() {
- name: volume.name,
- persistentVolumeClaim: { claimName: volume.name },
- }
-
- },
-
- configVolume:: function(name, dir, parts)
- {
-
- local volume = self,
-
- name: name,
-
- with_size:: function(size) self + { size: size },
-
- add:: function() [
- {
- apiVersion: "v1",
- kind: "ConfigMap",
- metadata: {
- name: volume.name,
- namespace: "trustgraph",
- },
- data: parts
- },
- ],
-
-
- volRef:: function() {
- name: volume.name,
- configMap: { name: volume.name },
- }
-
- },
-
- secretVolume:: function(name, dir, parts)
- {
-
- local volume = self,
-
- name: name,
-
- with_size:: function(size) self + { size: size },
-
- add:: function() [
- {
- apiVersion: "v1",
- kind: "Secret",
- metadata: {
- name: volume.name,
- namespace: "trustgraph",
- },
- data: {
- [item.key]: std.base64(item.value)
- for item in std.objectKeysValues(parts)
- }
- },
- ],
-
- volRef:: function() {
- name: volume.name,
- secret: { secretName: volume.name },
- }
-
- },
-
- envSecrets:: function(name)
- {
-
- local volume = self,
-
- name: name,
-
- variables: [],
- keyMap: {},
-
- with_size:: function(size) self + { size: size },
-
- add:: function() [
- ],
-
- volRef:: function() {
- name: volume.name,
- secret: { secretName: volume.name },
- },
-
- with_env_var::
- function(name, key) self + {
- variables: super.variables + [name],
- keyMap: super.keyMap + { [name]: key },
- },
-
- },
-
- containers:: function(name, containers)
- {
-
- local cont = self,
-
- name: name,
- containers: containers,
-
- add:: function() std.flattenArrays(
- [ c.add() for c in cont.containers ]
- ),
-
- },
-
- resources:: function(res)
-
- std.flattenArrays(
- [ c.add() for c in res ]
- ),
-
-}
-
diff --git a/templates/engine/minikube-k8s.jsonnet b/templates/engine/minikube-k8s.jsonnet
deleted file mode 100644
index 858b17ad..00000000
--- a/templates/engine/minikube-k8s.jsonnet
+++ /dev/null
@@ -1,115 +0,0 @@
-
-local k8s = import "k8s.jsonnet";
-
-local ns = {
- apiVersion: "v1",
- kind: "Namespace",
- metadata: {
- name: "trustgraph",
- },
- "spec": {
- },
-};
-
-k8s + {
-
- // Extract resources usnig the engine
- package:: function(patterns)
- local resources = [ns] + std.flattenArrays([
- p.create(self) for p in std.objectValues(patterns)
- ]);
- local resourceList = {
- apiVersion: "v1",
- kind: "List",
- items: resources,
- };
- resourceList,
-
- volume:: function(name)
- {
- local volume = self,
- name: name,
- with_size:: function(size) self + { size: size },
- add:: function() [
- {
- apiVersion: "v1",
- kind: "PersistentVolume",
- metadata: {
- name: volume.name,
- },
- spec: {
- accessModes: [ "ReadWriteOnce" ],
- capacity: {
- storage: volume.size,
- },
- persistentVolumeReclaimPolicy: "Delete",
- hostPath: {
- path: "/data/pv-" + volume.name,
- },
- }
- },
- {
- apiVersion: "v1",
- kind: "PersistentVolumeClaim",
- metadata: {
- name: volume.name,
- namespace: "trustgraph",
- },
- spec: {
- accessModes: [ "ReadWriteOnce" ],
- resources: {
- requests: {
- storage: volume.size,
- }
- },
- }
- }
- ],
-
- volRef:: function() {
- name: volume.name,
- persistentVolumeClaim: { claimName: volume.name },
- }
-
- },
-
- service:: function(containers)
- {
- local service = self,
- name: containers.name,
- ports: [],
- with_port::
- function(src, dest, name)
- self + {
- ports: super.ports + [
- { src: src, dest: dest, name: name }
- ]
- },
- add:: function() [
- {
- apiVersion: "v1",
- kind: "Service",
- metadata: {
- name: service.name,
- namespace: "trustgraph",
- },
- spec: {
- selector: {
- app: service.name,
- },
- type: "LoadBalancer",
- ports: [
- {
- port: port.src,
- targetPort: port.dest,
- name: port.name,
- }
- for port in service.ports
- ],
- }
- }
- ],
- },
-
-}
-
diff --git a/templates/generate b/templates/generate
deleted file mode 100755
index 2640a125..00000000
--- a/templates/generate
+++ /dev/null
@@ -1,235 +0,0 @@
-#!/usr/bin/env python3
-
-import _jsonnet as j
-import json
-import yaml
-import logging
-import os
-import sys
-import zipfile
-import pathlib
-from io import BytesIO
-
-logger = logging.getLogger("generate")
-logging.basicConfig(level=logging.INFO, format='%(message)s')
-
-private_json = "Put your GCP private.json here"
-
-class Generator:
-
- def __init__(
- self, config, templates="./templates/", resources="./resources",
- version="0.0.0",
- ):
-
- self.templates = pathlib.Path(templates)
- self.resources = pathlib.Path(resources)
- self.config = config
- self.version = f"\"{version}\"".encode("utf-8")
-
- def process(self, config):
-
- res = j.evaluate_snippet("config", config, import_callback=self.load)
- return json.loads(res)
-
- def load(self, dir, filename):
-
- logger.debug("Request jsonnet: %s %s", dir, filename)
-
- if filename == "config.json" and dir == "":
- path = os.path.join(".", dir, filename)
- return str(path), self.config
-
- if filename == "version.jsonnet" and dir == "templates/values/":
- path = os.path.join(".", dir, filename)
- return str(path), self.version
-
- if dir:
- candidates = [
- self.templates.joinpath(dir, filename),
- self.templates.joinpath(filename),
- self.resources.joinpath(dir, filename),
- self.resources.joinpath(filename),
- pathlib.Path(dir).joinpath(filename),
- ]
- else:
- candidates = [
- self.templates.joinpath(filename),
- pathlib.Path(dir).joinpath(filename),
- pathlib.Path(filename),
- ]
-
- try:
-
- if filename == "vertexai/private.json":
-
- return str(candidates[0]), private_json.encode("utf-8")
-
- for c in candidates:
- logger.debug("Try: %s", c)
-
- if os.path.isfile(c):
- with open(c, "rb") as f:
- logger.debug("Loading: %s", c)
- return str(c), f.read()
-
- raise RuntimeError(
- f"Could not load file={filename} dir={dir}"
- )
-
- except:
-
- path = os.path.join(self.templates, filename)
- logger.debug("Try: %s", path)
- with open(path, "rb") as f:
- logger.debug("Loaded: %s", path)
- return str(path), f.read()
-
-class Packager:
-
- def __init__(self):
- self.templates = pathlib.Path("./templates")
- self.resources = pathlib.Path("./")
-
- def process(
- self, config, version="0.0.0", platform="docker-compose",
- ):
-
- config = config.encode("utf-8")
-
- gen = Generator(
- config, templates=self.templates, resources=self.resources,
- version=version
- )
-
- path = self.templates.joinpath(
- f"config-to-{platform}.jsonnet"
- )
- wrapper = path.read_text()
-
- processed = gen.process(wrapper)
-
- return processed
-
- def generate(self, config, version, platform):
-
- logger.info(f"Generating for platform={platform} version={version}")
-
- try:
-
- if platform in set(["docker-compose", "podman-compose"]):
- return self.generate_docker_compose(
- "docker-compose", version, config
- )
- elif platform in set(["minikube-k8s", "gcp-k8s", "aks-k8s"]):
- return self.generate_k8s(
- platform, version, config
- )
- else:
- raise RuntimeError("Bad configuration")
-
- except Exception as e:
- logging.error(f"Exception: {e}")
- raise e
-
- def generate_docker_compose(self, platform, version, config):
-
- processed = self.process(
- config, platform=platform, version=version
- )
-
- y = yaml.dump(processed)
-
- mem = BytesIO()
-
- with zipfile.ZipFile(mem, mode='w') as out:
-
- def output(name, content):
- logger.info(f"Adding {name}...")
- out.writestr(name, content)
-
- fname = "docker-compose.yaml"
-
- output(fname, y)
-
- # Grafana config
- path = self.resources.joinpath(
- "grafana/dashboards/dashboard.json"
- )
- res = path.read_text()
- output("grafana/dashboards/dashboard.json", res)
-
- path = self.resources.joinpath(
- "grafana/provisioning/dashboard.yml"
- )
- res = path.read_text()
- output("grafana/provisioning/dashboard.yml", res)
-
- path = self.resources.joinpath(
- "grafana/provisioning/datasource.yml"
- )
- res = path.read_text()
- output("grafana/provisioning/datasource.yml", res)
-
- # Prometheus config
- path = self.resources.joinpath(
- "prometheus/prometheus.yml"
- )
- res = path.read_text()
- output("prometheus/prometheus.yml", res)
-
- logger.info("Generation complete.")
-
- return mem.getvalue()
-
- def generate_k8s(self, platform, version, config):
-
- processed = self.process(
- config, platform=platform, version=version
- )
-
- y = yaml.dump(processed)
-
- mem = BytesIO()
-
- with zipfile.ZipFile(mem, mode='w') as out:
-
- def output(name, content):
- logger.info(f"Adding {name}...")
- out.writestr(name, content)
-
- fname = "resources.yaml"
-
- output(fname, y)
-
- logger.info("Generation complete.")
-
- return mem.getvalue()
-
-def main():
-
- if len(sys.argv) != 4:
- print()
- print("Usage:")
- print(" generate < input.json")
- print()
- sys.exit(1)
-
- outfile = sys.argv[1]
- version = sys.argv[2]
- platform = sys.argv[3]
-
- cfg = sys.stdin.read()
-
- logger.info(f"Outputting to {outfile}...")
-
- p = Packager()
- resp = p.generate(cfg, version, platform)
-
- with open(outfile, "wb") as f:
- f.write(resp)
-
- return
-
-main()
-
diff --git a/templates/generate-all b/templates/generate-all
deleted file mode 100755
index fb1fe917..00000000
--- a/templates/generate-all
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env python3
-
-import _jsonnet as j
-import json
-import yaml
-import logging
-import os
-import sys
-import zipfile
-
-logger = logging.getLogger("generate")
-logging.basicConfig(level=logging.INFO, format='%(message)s')
-
-private_json = "Put your GCP private.json here"
-
-class Generator:
-
- def __init__(self, config, base="./templates/", version="0.0.0"):
-
- self.jsonnet_base = base
- self.config = config
- self.version = f"\"{version}\"".encode("utf-8")
-
- def process(self, config):
-
- res = j.evaluate_snippet("config", config, import_callback=self.load)
- return json.loads(res)
-
- def load(self, dir, filename):
-
- logger.debug("Request jsonnet: %s %s", dir, filename)
-
- if filename == "config.json" and dir == "":
- path = os.path.join(".", dir, filename)
- return str(path), self.config
-
- if filename == "version.jsonnet" and dir == "./templates/values/":
- path = os.path.join(".", dir, filename)
- return str(path), self.version
-
- if dir:
- candidates = [
- os.path.join(".", dir, filename),
- os.path.join(".", filename)
- ]
- else:
- candidates = [
- os.path.join(".", filename)
- ]
-
- try:
-
- if filename == "vertexai/private.json":
-
- return candidates[0], private_json.encode("utf-8")
-
- for c in candidates:
- logger.debug("Try: %s", c)
-
- if os.path.isfile(c):
- with open(c, "rb") as f:
- logger.debug("Loading: %s", c)
- return str(c), f.read()
-
- raise RuntimeError(
- f"Could not load file={filename} dir={dir}"
- )
-
- except:
-
- path = os.path.join(self.jsonnet_base, filename)
- logger.debug("Try: %s", path)
- with open(path, "rb") as f:
- logger.debug("Loaded: %s", path)
- return str(path), f.read()
-
-def config_object(items):
-
- return [
- { "name": v, "parameters": {} }
- for v in items
- ]
-
-def full_config_object(
- vector_store="qdrant", embeddings="embeddings-hf",
- graph_store="cassandra", llm="vertexai",
-):
-
- return config_object([
- "triple-store-" + graph_store,
- "pulsar",
- "vector-store-" + vector_store,
- embeddings,
- "graph-rag",
- "grafana",
- "trustgraph-base",
- llm,
- llm + "-rag",
- "workbench-ui",
- "prompt-template",
- "agent-manager-react",
- ])
-
-def generate_config(
- vector_store="qdrant", embeddings="embeddings-hf",
- graph_store="cassandra", llm="vertexai",
- platform="docker-compose",
- version="0.0.0"
-):
-
- config = full_config_object(
- vector_store=vector_store,
- embeddings=embeddings,
- graph_store=graph_store,
- llm=llm,
- )
-
- with open(f"./templates/config-to-{platform}.jsonnet", "r") as f:
- wrapper = f.read()
-
- gen = Generator(json.dumps(config).encode("utf-8"), version=version)
-
- processed = gen.process(wrapper)
-
- y = yaml.dump(processed)
-
- return y
-
-def generate_all(output, version):
-
- for platform in [
- "docker-compose",
- # "minikube-k8s", "gcp-k8s"
- ]:
- for model in [
- # "azure", "azure-openai", "bedrock", "claude", "cohere",
- # "googleaistudio", "llamafile", "mistral",
- "ollama",
- # "openai", "vertexai",
- ]:
- for graph in [
- "cassandra",
- # "neo4j", "falkordb"
- ]:
-
- y = generate_config(
- llm=model, graph_store=graph, platform=platform,
- version=version
- )
-
- fname = f"{platform}/tg-{model}-{graph}.yaml"
-
- output(fname, y)
-
-
-if len(sys.argv) < 3:
- raise RuntimeError("Usage: generate-all ")
-
-outfile = sys.argv[1]
-version = sys.argv[2]
-
-logger.info(f"Outputting to {outfile}...")
-
-with zipfile.ZipFile(outfile, mode='w') as out:
-
- def output(name, content):
- logger.info(f"Adding {name}...")
- out.writestr(name, content)
-
- generate_all(output=output, version=version)
-
- # Placeholder for the private.json file. Won't put actual credentials
- # here.
- output("docker-compose/vertexai/private.json", private_json)
-
- # Grafana config
- with open("grafana/dashboards/dashboard.json") as f:
- output("docker-compose/grafana/dashboards/dashboard.json", f.read())
-
- with open("grafana/provisioning/dashboard.yml") as f:
- output("docker-compose/grafana/provisioning/dashboard.yml", f.read())
-
- with open("grafana/provisioning/datasource.yml") as f:
- output("docker-compose/grafana/provisioning/datasource.yml", f.read())
-
- # Prometheus config
- with open("prometheus/prometheus.yml") as f:
- output("docker-compose/prometheus/prometheus.yml", f.read())
-
- # A README
- with open("templates/zip-readme.md") as f:
- output("README.md", f.read())
-
-logger.info("Output file written.")
-
diff --git a/templates/module-list.jsonnet b/templates/module-list.jsonnet
deleted file mode 100644
index b949021e..00000000
--- a/templates/module-list.jsonnet
+++ /dev/null
@@ -1,9 +0,0 @@
-
-local all = import "all-patterns.jsonnet";
-
-std.foldl(
- function(m, p) m + { [p.pattern.name]: p.module},
- all,
- {}
-)
-
diff --git a/templates/opts-to-docker-compose.jsonnet b/templates/opts-to-docker-compose.jsonnet
deleted file mode 100644
index c916475d..00000000
--- a/templates/opts-to-docker-compose.jsonnet
+++ /dev/null
@@ -1,21 +0,0 @@
-
-local engine = import "engine/docker-compose.jsonnet";
-local components = import "components.jsonnet";
-
-// Options
-local options = std.split(std.extVar("options"), ",");
-
-// Produce patterns from config
-local patterns = std.foldl(
- function(state, p) state + components[p],
- options,
- {}
-);
-
-// Extract resources usnig the engine
-local resources = engine.package(patterns);
-
-resources
-
-
-
diff --git a/templates/opts-to-gcp-k8s.jsonnet b/templates/opts-to-gcp-k8s.jsonnet
deleted file mode 100644
index a338cd9e..00000000
--- a/templates/opts-to-gcp-k8s.jsonnet
+++ /dev/null
@@ -1,19 +0,0 @@
-
-local engine = import "engine/docker-compose.jsonnet";
-local components = import "components.jsonnet";
-
-// Options
-local options = std.split(std.extVar("options"), ",");
-
-// Produce patterns from config
-local patterns = std.foldl(
- function(state, p) state + components[p],
- options,
- {}
-);
-
-// Extract resources usnig the engine
-local resources = engine.package(patterns);
-
-std.manifestYamlDoc(resources)
-
diff --git a/templates/opts-to-minikube-k8s.jsonnet b/templates/opts-to-minikube-k8s.jsonnet
deleted file mode 100644
index 15895909..00000000
--- a/templates/opts-to-minikube-k8s.jsonnet
+++ /dev/null
@@ -1,19 +0,0 @@
-
-local engine = import "engine/minikube-k8s.jsonnet";
-local components = import "components.jsonnet";
-
-// Options
-local options = std.split(std.extVar("options"), ",");
-
-// Produce patterns from config
-local patterns = std.foldl(
- function(state, p) state + components[p],
- options,
- {}
-);
-
-// Extract resources usnig the engine
-local resources = engine.package(patterns);
-
-resources
-
diff --git a/templates/patterns/document-rag.jsonnet b/templates/patterns/document-rag.jsonnet
deleted file mode 100644
index 631c14a5..00000000
--- a/templates/patterns/document-rag.jsonnet
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- pattern: {
- name: "document-rag",
- icon: "🤝😂",
- title: "Add DocumentRAG processing flow",
- description: "This pattern adds DocumentRAG components for extracting and querying documents based on document embeddings. You should make sure a vector store is included in your plan.",
- requires: ["pulsar", "trustgraph", "llm"],
- features: ["document-rag"],
- args: [
- ],
- category: [ "processing" ],
- },
- module: "components/document-rag.jsonnet",
-}
diff --git a/templates/patterns/embeddings-hf.jsonnet b/templates/patterns/embeddings-hf.jsonnet
deleted file mode 100644
index 0235dbd1..00000000
--- a/templates/patterns/embeddings-hf.jsonnet
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- pattern: {
- name: "embeddings-hf",
- icon: "🤖💬",
- title: "Add embeddings model which uses HuggingFace models",
- description: "This pattern integrates an embeddings model based on HuggingFace sentence-transformer library.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "embeddings-model",
- label: "Embeddings model",
- type: "select",
- description: "Embeddings model for sentence analysis",
- options: [
- { id: "all-MiniLM-L6-v2", description: "all-MiniLM-L6-v2" },
- { id: "mixedbread-ai/mxbai-embed-large-v1", description: "mxbai-embed-large-v1" },
- ],
- default: "all-MiniLM-L6-v2",
- required: true,
- },
- ],
- category: [ "embeddings" ],
- },
- module: "components/embeddings-hf.jsonnet",
-}
diff --git a/templates/patterns/embeddings-ollama.jsonnet b/templates/patterns/embeddings-ollama.jsonnet
deleted file mode 100644
index c3ed70a2..00000000
--- a/templates/patterns/embeddings-ollama.jsonnet
+++ /dev/null
@@ -1,34 +0,0 @@
-{
- pattern: {
- name: "embeddings-ollama",
- icon: "🤖💬",
- title: "Add embeddings model hosted on Ollama",
- description: "This pattern integrates an embeddings model based on HuggingFace sentence-transformer library.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "embeddings-model",
- label: "Embeddings model",
- type: "select",
- description: "Embeddings model for sentence analysis",
- options: [
- { id: "mxbai-embed-large", description: "mxbai-embed-large" },
- ],
- default: "mxbai-embed-large",
- required: true,
- },
- {
- name: "ollama-url",
- label: "URL",
- type: "text",
- width: 120,
- description: "URL of the Ollama service",
- default: "http://ollama:11434",
- required: true,
- },
- ],
- category: [ "embeddings" ],
- },
- module: "components/embeddings-hf.jsonnet",
-}
diff --git a/templates/patterns/grafana.jsonnet b/templates/patterns/grafana.jsonnet
deleted file mode 100644
index 49ae692c..00000000
--- a/templates/patterns/grafana.jsonnet
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- pattern: {
- name: "grafana",
- icon: "📈🧯",
- title: "Add Prometheus and Grafana for monitoring and dashboards",
- description: "System monitoring and dashboarding using Grafana and Prometheus",
- requires: ["pulsar", "trustgraph"],
- features: ["prometheus", "grafana"],
- args: [
- ],
- category: ["monitoring"],
- },
- module: "components/grafana.jsonnet",
-}
diff --git a/templates/patterns/graph-rag.jsonnet b/templates/patterns/graph-rag.jsonnet
deleted file mode 100644
index 5f598006..00000000
--- a/templates/patterns/graph-rag.jsonnet
+++ /dev/null
@@ -1,38 +0,0 @@
-{
- pattern: {
- name: "graph-rag",
- icon: "🤝😂",
- title: "Add GraphRAG processing flow",
- description: "This pattern adds GraphRAG components for extracting and querying graph edges. You should make sure a triple store and vector store are included in your plan.",
- requires: ["pulsar", "trustgraph", "llm"],
- features: ["graph-rag"],
- args: [
- {
- name: "graph-rag-entity-limit",
- label: "GraphRAG entity query limit",
- type: "integer",
- description: "Limit on entities to fetch from vector store",
- default: 50,
- required: true,
- },
- {
- name: "graph-rag-triple-limit",
- label: "GraphRAG triple query limit",
- type: "integer",
- description: "Limit on triples to fetch from triple store",
- default: 30,
- required: true,
- },
- {
- name: "graph-rag-max-subgraph-size",
- label: "GraphRAG maximum subgraph size",
- type: "integer",
- description: "Limit on size of subgraph to present to text-completion model",
- default: 3000,
- required: true,
- },
- ],
- category: [ "processing" ],
- },
- module: "components/trustgraph.jsonnet",
-}
diff --git a/templates/patterns/llm-azure-openai.jsonnet b/templates/patterns/llm-azure-openai.jsonnet
deleted file mode 100644
index 06e1a3f5..00000000
--- a/templates/patterns/llm-azure-openai.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "azure-openai",
- icon: "🤖💬",
- title: "Add Azure OpenAI LLM endpoint for text completion",
- description: "This pattern integrates an Azure OpenAI LLM endpoint hosted in the Azure cloud for text completion operations. You need an Azure subscription to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "azure-openai-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "azure-openai-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/azure.jsonnet",
-}
diff --git a/templates/patterns/llm-azure.jsonnet b/templates/patterns/llm-azure.jsonnet
deleted file mode 100644
index 7a0f12ee..00000000
--- a/templates/patterns/llm-azure.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "azure",
- icon: "🤖💬",
- title: "Add Azure LLM endpoint for text completion",
- description: "This pattern integrates an Azure LLM endpoint hosted in the Azure cloud for text completion operations. You need an Azure subscription and to have an endpoint deployed to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "azure-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "azure-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/azure.jsonnet",
-}
diff --git a/templates/patterns/llm-bedrock.jsonnet b/templates/patterns/llm-bedrock.jsonnet
deleted file mode 100644
index e114c428..00000000
--- a/templates/patterns/llm-bedrock.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "bedrock",
- icon: "🤖💬",
- title: "Add AWS Bedrock for text completion",
- description: "This pattern integrates an AWS Bedrock LLM service hosted in the AWS cloud for text completion operations. You need an AWS cloud subscription and to have Bedrock configured to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "bedrock-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "bedrock-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/bedrock.jsonnet",
-}
diff --git a/templates/patterns/llm-claude.jsonnet b/templates/patterns/llm-claude.jsonnet
deleted file mode 100644
index beefef4b..00000000
--- a/templates/patterns/llm-claude.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "claude",
- icon: "🤖💬",
- title: "Add Anthropic Claude for text completion",
- description: "This pattern integrates an Anthropic Claude LLM service for text completion operations. You need a Claude subscription to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "claude-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "claude-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/claude.jsonnet",
-}
diff --git a/templates/patterns/llm-cohere.jsonnet b/templates/patterns/llm-cohere.jsonnet
deleted file mode 100644
index 33a070e3..00000000
--- a/templates/patterns/llm-cohere.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "cohere",
- icon: "🤖💬",
- title: "Add Cohere LLM endpoint for text completion",
- description: "This pattern integrates the Cohere LLM service for text completion operations. You need a Cohere subscription and API keys to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "cohere-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "cohere-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/cohere.jsonnet",
-}
diff --git a/templates/patterns/llm-googleaistudio.jsonnet b/templates/patterns/llm-googleaistudio.jsonnet
deleted file mode 100644
index aa56d347..00000000
--- a/templates/patterns/llm-googleaistudio.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "googleaistudio",
- icon: "🤖💬",
- title: "Add GoogleAIStudio for text completion",
- description: "This pattern integrates a GoogleAIStudio LLM service for text completion operations. You need a GoogleAISTudio API key to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "googleaistudio-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "googleaistudio-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/googleaistudio.jsonnet",
-}
diff --git a/templates/patterns/llm-llamafile.jsonnet b/templates/patterns/llm-llamafile.jsonnet
deleted file mode 100644
index de2d95ef..00000000
--- a/templates/patterns/llm-llamafile.jsonnet
+++ /dev/null
@@ -1,41 +0,0 @@
-{
- pattern: {
- name: "llamafile",
- icon: "🤖💬",
- title: "Add Llamafile-invoked LLMs for text completion",
- description: "This pattern integrates a Llamafile service for text completion operations. You need to have a running Llamafile implementation executing the necessary model in order to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "llamafile-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "llamafile-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- {
- name: "llamafile-url",
- label: "URL",
- type: "text",
- width: 120,
- description: "URL of the Llamafile service",
- default: "http://llamafile:8080",
- required: true,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/llamafile.jsonnet",
-}
diff --git a/templates/patterns/llm-mistral.jsonnet b/templates/patterns/llm-mistral.jsonnet
deleted file mode 100644
index 11f6de22..00000000
--- a/templates/patterns/llm-mistral.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "mistral",
- icon: "🤖💬",
- title: "Add Mistral LLM endpoint for text completion",
- description: "This pattern integrates a Mistral LLM service for text completion operations. You need a Mistral subscription and have an API key to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "mistral-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "mistral-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/mistral.jsonnet",
-}
diff --git a/templates/patterns/llm-ollama.jsonnet b/templates/patterns/llm-ollama.jsonnet
deleted file mode 100644
index ea82bbc7..00000000
--- a/templates/patterns/llm-ollama.jsonnet
+++ /dev/null
@@ -1,41 +0,0 @@
-{
- pattern: {
- name: "ollama",
- icon: "🤖💬",
- title: "Add Ollama LLM for text completion",
- description: "This pattern integrates an Ollama service for text completion operations. You need to have a running Ollama service with the necessary models added in order to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "ollama-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "ollama-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- {
- name: "ollama-url",
- label: "URL",
- type: "text",
- width: 120,
- description: "URL of the Ollama service",
- default: "http://ollama:11434",
- required: true,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/ollama.jsonnet",
-}
diff --git a/templates/patterns/llm-openai.jsonnet b/templates/patterns/llm-openai.jsonnet
deleted file mode 100644
index fc1bbd51..00000000
--- a/templates/patterns/llm-openai.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "openai",
- icon: "🤖💬",
- title: "Add OpenAI LLM endpoint for text completion",
- description: "This pattern integrates an OpenAI LLM service for text completion operations. You need an OpenAI subscription and have an API key to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "openai-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "openai-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/openai.jsonnet",
-}
diff --git a/templates/patterns/llm-vertexai.jsonnet b/templates/patterns/llm-vertexai.jsonnet
deleted file mode 100644
index 040aa9a1..00000000
--- a/templates/patterns/llm-vertexai.jsonnet
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- pattern: {
- name: "vertexai",
- icon: "🤖💬",
- title: "Add Google Cloud VertexAI LLM for text completion",
- description: "This pattern integrates a VertexAI endpoint hosted in Google Cloud for text completion operations. You need a GCP subscription and to have VertexAI enabled to be able to use this service.",
- requires: ["pulsar", "trustgraph"],
- features: ["llm"],
- args: [
- {
- name: "vertexai-max-output-tokens",
- label: "Maximum output tokens",
- type: "integer",
- description: "Limit on number tokens to generate",
- default: 4096,
- required: true,
- },
- {
- name: "vertexai-temperature",
- label: "Temperature",
- type: "slider",
- description: "Controlling predictability / creativity balance",
- min: 0,
- max: 1,
- step: 0.05,
- default: 0.5,
- },
- ],
- category: [ "llm" ],
- },
- module: "components/vertexai.jsonnet",
-}
diff --git a/templates/patterns/override-recursive-chunker.jsonnet b/templates/patterns/override-recursive-chunker.jsonnet
deleted file mode 100644
index 65f93947..00000000
--- a/templates/patterns/override-recursive-chunker.jsonnet
+++ /dev/null
@@ -1,28 +0,0 @@
-{
- pattern: {
- name: "override-recursive-chunker",
- icon: "✂️🪚",
- title: "Replace default chunker with recursive chunker",
- description: "The default chunker used in Trustgraph core is a token-based chunker. This pattern replaces that with a recursive chunker, and allows ou to configure the chunking parameters.",
- requires: ["pulsar", "trustgraph"],
- features: [],
- args: [
- {
- name: "chunk-size",
- type: "integer",
- description: "Chunk size value",
- default: 2000,
- required: true,
- },
- {
- name: "chunk-overlap",
- type: "integer",
- description: "Overlap size value",
- default: 100,
- required: true,
- }
- ],
- category: [ "chunking" ],
- },
- module: "components/cassandra.jsonnet",
-}
diff --git a/templates/patterns/prompt-template-definitions.jsonnet b/templates/patterns/prompt-template-definitions.jsonnet
deleted file mode 100644
index 8dc89465..00000000
--- a/templates/patterns/prompt-template-definitions.jsonnet
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- pattern: {
- name: "prompt-template-definitions",
- icon: "📜️️💬",
- title: "Override definition extraction prompt",
- description: "This pattern overrides the default definition extraction LLM prompt allowing you to provide your own prompt.",
- requires: ["pulsar", "trustgraph"],
- features: ["extract-definition-prompt"],
- args: [
- {
- name: "prompt-definition-template",
- type: "multiline",
- size: 2000,
- rows: 10,
- description: "Definition extraction prompt",
- default: "\nStudy the following text and derive definitions for any discovered entities.\nDo not provide definitions for entities whose definitions are incomplete\nor unknown.\nOutput relationships in JSON format as an arary of objects with fields:\n- entity: the name of the entity\n- definition: English text which defines the entity\n\n\n\n{text}\n\n\n\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract will be written as plain text. Do not add markdown formatting\nor headers or prefixes. Do not include null or unknown definitions.\n",
- required: true,
- }
- ],
- category: [ "prompting" ],
- },
- module: "components/null.jsonnet",
-}
diff --git a/templates/patterns/prompt-template-document-query.jsonnet b/templates/patterns/prompt-template-document-query.jsonnet
deleted file mode 100644
index f445870a..00000000
--- a/templates/patterns/prompt-template-document-query.jsonnet
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- pattern: {
- name: "prompt-template-document-query",
- icon: "📜️️💬",
- title: "Override document query prompt",
- description: "This pattern overrides the default document query prompt used for DocumentRAG allowing you to specify your own prompt.",
- requires: ["pulsar", "trustgraph"],
- features: ["document-query-prompt"],
- args: [
- {
- name: "prompt-document-query-template",
- type: "multiline",
- size: 2000,
- rows: 10,
- description: "Document query prompt",
- default: "Study the following context. Use only the information provided in the context in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere is the context:\n{documents}\n\nUse only the provided knowledge statements to respond to the following:\n{query}\n",
- required: true,
- }
- ],
- category: [ "prompting" ],
- },
- module: "components/null.jsonnet",
-}
diff --git a/templates/patterns/prompt-template-kg-query.jsonnet b/templates/patterns/prompt-template-kg-query.jsonnet
deleted file mode 100644
index c0a256a4..00000000
--- a/templates/patterns/prompt-template-kg-query.jsonnet
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- pattern: {
- name: "prompt-template-kq-query",
- icon: "📜️️💬",
- title: "Override knowledge query prompt",
- description: "This pattern overrides the default knowledge query LLM prompt allowing you to provide your own prompt.",
- requires: ["pulsar", "trustgraph"],
- features: ["kg-query-prompt"],
- args: [
- {
- name: "prompt-knowledge-query-template",
- type: "multiline",
- size: 2000,
- rows: 10,
- description: "Knowledge graph extraction prompt",
- default: "Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere's the knowledge statements:\n{graph}\n\nUse only the provided knowledge statements to respond to the following:\n{query}\n",
- required: true,
- }
- ],
- category: [ "prompting" ],
- },
- module: "components/null.jsonnet",
-}
diff --git a/templates/patterns/prompt-template-relationships.jsonnet b/templates/patterns/prompt-template-relationships.jsonnet
deleted file mode 100644
index 1c295ca9..00000000
--- a/templates/patterns/prompt-template-relationships.jsonnet
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- pattern: {
- name: "prompt-template-relationships",
- icon: "📜️️💬",
- title: "Override relationship extraction prompt",
- description: "This pattern overrides the default relationship extraction LLM prompt allowing you to provide your own prompt.",
- requires: ["pulsar", "trustgraph"],
- features: ["extract-relationship-prompt"],
- args: [
- {
- name: "prompt-relationship-template",
- type: "multiline",
- size: 2000,
- rows: 10,
- description: "Relationship extraction prompt",
- default: "\nStudy the following text and derive entity relationships. For each\nrelationship, derive the subject, predicate and object of the relationship.\nOutput relationships in JSON format as an arary of objects with fields:\n- subject: the subject of the relationship\n- predicate: the predicate\n- object: the object of the relationship\n- object-entity: false if the object is a simple data type: name, value or date. true if it is an entity.\n\n\n\n{text}\n\n\n\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract must be written as plain text. Do not add markdown formatting\nor headers or prefixes.\n",
- required: true,
- }
- ],
- category: [ "prompting" ],
- },
- module: "components/null.jsonnet",
-}
diff --git a/templates/patterns/prompt-template-rows-template.jsonnet b/templates/patterns/prompt-template-rows-template.jsonnet
deleted file mode 100644
index 09040aa1..00000000
--- a/templates/patterns/prompt-template-rows-template.jsonnet
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- pattern: {
- name: "prompt-template-rows-template",
- icon: "📜️️💬",
- title: "Override table/row extraction prompt",
- description: "This pattern overrides the default table/row extraction prompt to be changed. This is used for DatabaseRAG pipelines.",
- requires: ["pulsar", "trustgraph"],
- features: ["extract-rows-prompt"],
- args: [
- {
- name: "prompt-rows-template",
- type: "multiline",
- size: 2000,
- rows: 10,
- description: "Row data extraction prompt",
- default: "\nStudy the following text and derive objects which match the schema provided.\n\nYou must output an array of JSON objects for each object you discover\nwhich matches the schema. For each object, output a JSON object whose fields\ncarry the name field specified in the schema.\n\n\n\n{schema}\n\n\n\n{text}\n\n\n\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not add markdown formatting or headers or prefixes.\n",
- required: true,
- }
- ],
- category: [ "prompting" ],
- },
- module: "components/null.jsonnet",
-}
diff --git a/templates/patterns/pulsar-manager.jsonnet b/templates/patterns/pulsar-manager.jsonnet
deleted file mode 100644
index 81fa6bde..00000000
--- a/templates/patterns/pulsar-manager.jsonnet
+++ /dev/null
@@ -1,23 +0,0 @@
-{
- pattern: {
- name: "pulsar-manager",
- icon: "🏻🛃",
- title: "Add Pulsar Manager",
- description: "Adds Pulsar Manager which provides a web interface to manage Pulsar. Pulsar Manager is a large container and deployment requiring over 1GB of RAM, so is not deployed by default. This is not a required component, it may be useful to help manage a large operational deployment.",
- requires: ["pulsar"],
- features: ["pulsar-manager"],
- args: [
- {
- name: "initial-admin-password",
- label: "Password",
- type: "text",
- width: 40,
- description: "Admin password to apply at initialisation",
- default: "pulsaradmin",
- required: true,
- },
- ],
- category: [ "foundation" ],
- },
- module: "components/pulsar.jsonnet",
-}
diff --git a/templates/patterns/pulsar.jsonnet b/templates/patterns/pulsar.jsonnet
deleted file mode 100644
index 4b1a1205..00000000
--- a/templates/patterns/pulsar.jsonnet
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- pattern: {
- name: "pulsar",
- icon: "🌟☄️",
- title: "Deploy foundation messaging fabric",
- description: "Deploy Pulsar as the inter-process messaging fabric. Pulsar is a Cloud-native, distributed messaging and Streaming. Apache Pulsar is an open-source, distributed messaging and streaming platform built for the cloud. Trustgraph uses Pulsar to manage the message flow between all components.",
- requires: [],
- features: ["pulsar"],
- args: [
- ],
- category: [ "foundation" ],
- },
- module: "components/pulsar.jsonnet",
-}
diff --git a/templates/patterns/triple-store-cassandra.jsonnet b/templates/patterns/triple-store-cassandra.jsonnet
deleted file mode 100644
index b386b2f5..00000000
--- a/templates/patterns/triple-store-cassandra.jsonnet
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- pattern: {
- name: "triple-store-cassandra",
- icon: "🖇️🙋♀️",
- title: "Adds a Cassandra store configured to act as a triple store",
- description: "GraphRAG processing needs a triple store. This pattern adds a Cassandra store, along with plumbing so that Cassandra is integrated with GraphRag indexing and querying.",
- requires: ["pulsar", "trustgraph"],
- features: ["cassandra", "triple-store"],
- args: [],
- category: ["knowledge-graph"],
- },
- module: "components/cassandra.jsonnet",
-}
diff --git a/templates/patterns/triple-store-falkordb.jsonnet b/templates/patterns/triple-store-falkordb.jsonnet
deleted file mode 100644
index 40ef02e2..00000000
--- a/templates/patterns/triple-store-falkordb.jsonnet
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- pattern: {
- name: "triple-store-falkordb",
- icon: "🖇️🙋♀️",
- title: "Adds a FalkorDB store configured to act as a triple store.",
- description: "GraphRAG processing needs a triple store. This pattern adds a FalkorDB store, along with plumbing so that FalkorDB is integrated with GraphRag indexing and querying.",
- requires: ["pulsar", "trustgraph"],
- features: ["falkordb", "triple-store"],
- args: [],
- category: [ "knowledge-graph" ],
- },
- module: "components/falkordb.jsonnet",
-}
diff --git a/templates/patterns/triple-store-neo4j.jsonnet b/templates/patterns/triple-store-neo4j.jsonnet
deleted file mode 100644
index b111ebe3..00000000
--- a/templates/patterns/triple-store-neo4j.jsonnet
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- pattern: {
- name: "triple-store-neo4j",
- icon: "🖇️🙋♀️",
- title: "Adds a Neo4j store configured to act as a triple store.",
- description: "GraphRAG processing needs a triple store. This pattern adds a Neo4j store, along with plumbing so that Neo4j is integrated with GraphRag indexing and querying.",
- requires: ["pulsar", "trustgraph"],
- features: ["neo4j", "triple-store"],
- args: [],
- category: [ "knowledge-graph" ],
- },
- module: "components/neo4j.jsonnet",
-}
diff --git a/templates/patterns/trustgraph-base.jsonnet b/templates/patterns/trustgraph-base.jsonnet
deleted file mode 100644
index c7e912f5..00000000
--- a/templates/patterns/trustgraph-base.jsonnet
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- pattern: {
- name: "trustgraph-base",
- icon: "🤝😂",
- title: "Add Trustgraph base processing flows",
- description: "This pattern adds a core set of Trustgraph flows, including PDF ingest, chunking, embeddings, and knowledge graph extraction. You should also consider adding an LLM and at least one RAG processing flow.",
- requires: ["pulsar"],
- features: ["trustgraph"],
- args: [
- ],
- category: [ "foundation" ],
- },
- module: "components/trustgraph.jsonnet",
-}
diff --git a/templates/patterns/vector-store-milvus.jsonnet b/templates/patterns/vector-store-milvus.jsonnet
deleted file mode 100644
index edbb4e57..00000000
--- a/templates/patterns/vector-store-milvus.jsonnet
+++ /dev/null
@@ -1,15 +0,0 @@
-{
- pattern: {
- name: "vector-store-milvus",
- icon: "❓🌐",
- title: "Add Milvus, a vector embeddings store",
- description: "The Trustgraph core does not include a vector store by default. This configuration pattern adds a simple Milvus store and integrates with embeddings handling.",
- requires: ["pulsar", "trustgraph"],
- features: ["milvus", "vectordb"],
- args: [
- ],
- category: [ "vector-store" ],
- },
- module: "components/milvus.jsonnet",
-}
-
diff --git a/templates/patterns/vector-store-qdrant.jsonnet b/templates/patterns/vector-store-qdrant.jsonnet
deleted file mode 100644
index 8d949f4b..00000000
--- a/templates/patterns/vector-store-qdrant.jsonnet
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- pattern: {
- name: "vector-store-qdrant",
- icon: "❓🌐",
- title: "Adds Qdrant, a vector embeddings store",
- description: "The Trustgraph core does not include a vector store by default. This configuration pattern adds a simple Qdrant store and integrates with embeddings handling.",
- requires: ["pulsar", "trustgraph"],
- features: ["qdrant", "vectordb"],
- args: [
- ],
- category: [ "vector-store" ],
- },
- module: "components/qdrant.jsonnet",
-}
diff --git a/templates/prompts/cohere.jsonnet b/templates/prompts/cohere.jsonnet
deleted file mode 100644
index 9541e4c2..00000000
--- a/templates/prompts/cohere.jsonnet
+++ /dev/null
@@ -1,42 +0,0 @@
-// For Cohere. Not currently overriding prompts
-
-local prompts = import "default-prompts.jsonnet";
-
-prompts + {
-
- // "system-template":: "PROMPT GOES HERE.",
-
- "templates" +:: {
-
- "question" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-definitions" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-relationships" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-topics" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-rows" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "kg-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "document-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- }
-
-}
-
diff --git a/templates/prompts/default-prompts.jsonnet b/templates/prompts/default-prompts.jsonnet
deleted file mode 100644
index be05b992..00000000
--- a/templates/prompts/default-prompts.jsonnet
+++ /dev/null
@@ -1,114 +0,0 @@
-
-// Prompt templates. For tidy JSONNET use, don't change these templates
-// here, but use over-rides in the prompt directory
-
-{
-
- "system-template":: "You are a helpful assistant.",
-
- "templates":: {
-
- "question":: {
- "prompt": "{{question}}",
- },
-
- "extract-definitions":: {
- "prompt": "\nStudy the following text and derive definitions for any discovered entities.\nDo not provide definitions for entities whose definitions are incomplete\nor unknown.\nOutput relationships in JSON format as an arary of objects with fields:\n- entity: the name of the entity\n- definition: English text which defines the entity\n\n\n\n{{text}}\n\n\n\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract will be written as plain text. Do not add markdown formatting\nor headers or prefixes. Do not include null or unknown definitions.\n",
- "response-type": "json",
- "schema": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "entity": {
- "type": "string"
- },
- "definition": {
- "type": "string"
- }
- },
- "required": [
- "entity",
- "definition"
- ]
- }
- }
- },
-
- "extract-relationships":: {
- "prompt": "\nStudy the following text and derive entity relationships. For each\nrelationship, derive the subject, predicate and object of the relationship.\nOutput relationships in JSON format as an arary of objects with fields:\n- subject: the subject of the relationship\n- predicate: the predicate\n- object: the object of the relationship\n- object-entity: false if the object is a simple data type: name, value or date. true if it is an entity.\n\n\n\n{{text}}\n\n\n\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not use special characters in the abstract text. The\nabstract must be written as plain text. Do not add markdown formatting\nor headers or prefixes.\n",
- "response-type": "json",
- "schema": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "subject": {
- "type": "string"
- },
- "predicate": {
- "type": "string"
- },
- "object": {
- "type": "string"
- },
- "object-entity": {
- "type": "boolean"
- },
- },
- "required": [
- "subject",
- "predicate",
- "object",
- "object-entity"
- ]
- }
- }
- },
-
- "extract-topics":: {
- "prompt": "You are a helpful assistant that performs information extraction tasks for a provided text.\nRead the provided text. You will identify topics and their definitions in JSON.\n\nReading Instructions:\n- Ignore document formatting in the provided text.\n- Study the provided text carefully.\n\nHere is the text:\n{{text}}\n\nResponse Instructions: \n- Do not respond with special characters.\n- Return only topics that are concepts and unique to the provided text.\n- Respond only with well-formed JSON.\n- The JSON response shall be an array of objects with keys \"topic\" and \"definition\". \n- The JSON response shall use the following structure:\n\n```json\n[{\"topic\": string, \"definition\": string}]\n```\n\n- Do not write any additional text or explanations.",
- "response-type": "json",
- "schema": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "topic": {
- "type": "string"
- },
- "definition": {
- "type": "string"
- }
- },
- "required": [
- "topic",
- "definition"
- ]
- }
- }
- },
-
- "extract-rows":: {
- "prompt": "\nStudy the following text and derive objects which match the schema provided.\n\nYou must output an array of JSON objects for each object you discover\nwhich matches the schema. For each object, output a JSON object whose fields\ncarry the name field specified in the schema.\n\n\n\n{{schema}}\n\n\n\n{{text}}\n\n\n\nYou will respond only with raw JSON format data. Do not provide\nexplanations. Do not add markdown formatting or headers or prefixes.\n",
- "response-type": "json",
- },
-
- "kg-prompt":: {
- "prompt": "Study the following set of knowledge statements. The statements are written in Cypher format that has been extracted from a knowledge graph. Use only the provided set of knowledge statements in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere's the knowledge statements:\n{% for edge in knowledge %}({{edge.s}})-[{{edge.p}}]->({{edge.o}})\n{%endfor%}\n\nUse only the provided knowledge statements to respond to the following:\n{{query}}\n",
- "response-type": "text",
- },
-
- "document-prompt":: {
- "prompt": "Study the following context. Use only the information provided in the context in your response. Do not speculate if the answer is not found in the provided set of knowledge statements.\n\nHere is the context:\n{{documents}}\n\nUse only the provided knowledge statements to respond to the following:\n{{query}}\n",
- "response-type": "text",
- },
-
- "agent-react":: {
- "prompt": "Answer the following questions as best you can. You have\naccess to the following functions:\n\n{% for tool in tools %}{\n \"function\": \"{{ tool.name }}\",\n \"description\": \"{{ tool.description }}\",\n \"arguments\": [\n{% for arg in tool.arguments %} {\n \"name\": \"{{ arg.name }}\",\n \"type\": \"{{ arg.type }}\",\n \"description\": \"{{ arg.description }}\",\n }\n{% endfor %}\n ]\n}\n{% endfor %}\n\nYou can either choose to call a function to get more information, or\nreturn a final answer.\n \nTo call a function, respond with a JSON object of the following format:\n\n{\n \"thought\": \"your thought about what to do\",\n \"action\": \"the action to take, should be one of [{{tool_names}}]\",\n \"arguments\": {\n \"argument1\": \"argument_value\",\n \"argument2\": \"argument_value\"\n }\n}\n\nTo provide a final answer, response a JSON object of the following format:\n\n{\n \"thought\": \"I now know the final answer\",\n \"final-answer\": \"the final answer to the original input question\"\n}\n\nPrevious steps are included in the input. Each step has the following\nformat in your output:\n\n{\n \"thought\": \"your thought about what to do\",\n \"action\": \"the action taken\",\n \"arguments\": {\n \"argument1\": action argument,\n \"argument2\": action argument2\n },\n \"observation\": \"the result of the action\",\n}\n\nRespond by describing either one single thought/action/arguments or\nthe final-answer. Pause after providing one action or final-answer.\n\n{% if context %}Additional context has been provided:\n{{context}}{% endif %}\n\nQuestion: {{question}}\n\nInput:\n \n{% for h in history %}\n{\n \"action\": \"{{h.action}}\",\n \"arguments\": [\n{% for k, v in h.arguments.items() %} {\n \"{{k}}\": \"{{v}}\",\n{%endfor%} }\n ],\n \"observation\": \"{{h.observation}}\"\n}\n{% endfor %}",
- "response-type": "json"
- }
- }
-
-}
-
diff --git a/templates/prompts/gemini.jsonnet b/templates/prompts/gemini.jsonnet
deleted file mode 100644
index b9a1e0c0..00000000
--- a/templates/prompts/gemini.jsonnet
+++ /dev/null
@@ -1,42 +0,0 @@
-// For VertexAI Gemini. Not currently overriding prompts
-
-local prompts = import "default-prompts.jsonnet";
-
-prompts + {
-
- // "system-template":: "PROMPT GOES HERE.",
-
- "templates" +:: {
-
- "question" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-definitions" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-relationships" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-topics" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-rows" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "kg-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "document-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- }
-
-}
-
diff --git a/templates/prompts/mixtral.jsonnet b/templates/prompts/mixtral.jsonnet
deleted file mode 100644
index cd56e7ef..00000000
--- a/templates/prompts/mixtral.jsonnet
+++ /dev/null
@@ -1,42 +0,0 @@
-// For Mixtral. Not currently overriding prompts
-
-local prompts = import "default-prompts.jsonnet";
-
-prompts + {
-
- // "system-template":: "PROMPT GOES HERE.",
-
- "templates" +:: {
-
- "question" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-definitions" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-relationships" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-topics" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-rows" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "kg-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "document-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- }
-
-}
-
diff --git a/templates/prompts/openai.jsonnet b/templates/prompts/openai.jsonnet
deleted file mode 100644
index 5d232337..00000000
--- a/templates/prompts/openai.jsonnet
+++ /dev/null
@@ -1,42 +0,0 @@
-// For OpenAI LLMs. Not currently overriding prompts
-
-local prompts = import "default-prompts.jsonnet";
-
-prompts + {
-
- // "system-template":: "PROMPT GOES HERE.",
-
- "templates" +:: {
-
- "question" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-definitions" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-relationships" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-topics" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-rows" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "kg-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "document-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- }
-
-}
-
diff --git a/templates/prompts/slm.jsonnet b/templates/prompts/slm.jsonnet
deleted file mode 100644
index 48eb96d0..00000000
--- a/templates/prompts/slm.jsonnet
+++ /dev/null
@@ -1,44 +0,0 @@
-// For SLM. Not currently overriding prompts
-
-local prompts = import "default-prompts.jsonnet";
-
-prompts + {
-
- // "system-template":: "PROMPT GOES HERE.",
-
- "templates" +:: {
-
- "question" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-definitions" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-relationships" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-topics" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "extract-rows" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "kg-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- "document-prompt" +:: {
- // "prompt": "PROMPT GOES HERE",
- },
-
- }
-
-}
-
-
-
diff --git a/templates/storage.jsonnet b/templates/storage.jsonnet
deleted file mode 100644
index 6ae5c0be..00000000
--- a/templates/storage.jsonnet
+++ /dev/null
@@ -1,10 +0,0 @@
-
-local cassandra = import "components/stores/cassandra.jsonnet";
-local pulsar = import "components/pulsar.jsonnet";
-local milvus = import "components/stores/milvus.jsonnet";
-local grafana = import "components/grafana.jsonnet";
-
-local config = cassandra + pulsar + milvus + grafana;
-
-std.manifestYamlDoc(config)
-
diff --git a/templates/stores/cassandra.jsonnet b/templates/stores/cassandra.jsonnet
deleted file mode 100644
index 2a9d6d7a..00000000
--- a/templates/stores/cassandra.jsonnet
+++ /dev/null
@@ -1,40 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-
-{
-
- "cassandra" +: {
-
- create:: function(engine)
-
- local vol = engine.volume("cassandra").with_size("20G");
-
- local container =
- engine.container("cassandra")
- .with_image(images.cassandra)
- .with_environment({
- JVM_OPTS: "-Xms300M -Xmx300M -Dcassandra.skip_wait_for_gossip_to_settle=0",
- })
- .with_limits("1.0", "1000M")
- .with_reservations("0.5", "1000M")
- .with_port(9042, 9042, "cassandra")
- .with_volume_mount(vol, "/var/lib/cassandra");
-
- local containerSet = engine.containers(
- "cassandra", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(9042, 9042, "api");
-
- engine.resources([
- vol,
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/stores/falkordb.jsonnet b/templates/stores/falkordb.jsonnet
deleted file mode 100644
index 78509a43..00000000
--- a/templates/stores/falkordb.jsonnet
+++ /dev/null
@@ -1,39 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-
-{
-
- "falkordb" +: {
-
- create:: function(engine)
-
- local vol = engine.volume("falkordb").with_size("20G");
-
- local container =
- engine.container("falkordb")
- .with_image(images.falkordb)
- .with_limits("1.0", "768M")
- .with_reservations("0.5", "768M")
- .with_port(6379, 6379, "api")
- .with_port(3010, 3000, "ui")
- .with_volume_mount(vol, "/data");
-
- local containerSet = engine.containers(
- "falkordb", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(6379, 6379, "api")
- .with_port(3010, 3010, "ui");
-
- engine.resources([
- vol,
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/stores/memgraph.jsonnet b/templates/stores/memgraph.jsonnet
deleted file mode 100644
index 70ad127a..00000000
--- a/templates/stores/memgraph.jsonnet
+++ /dev/null
@@ -1,71 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-
-{
-
- "memgraph" +: {
-
- create:: function(engine)
-
- local vol = engine.volume("memgraph").with_size("20G");
-
- local container =
- engine.container("memgraph")
- .with_image(images.memgraph_mage)
- .with_environment({
- MEMGRAPH: "--storage-properties-on-edges=true --storage-enable-edges-metadata=true"
- })
- .with_limits("1.0", "1000M")
- .with_reservations("0.5", "1000M")
- .with_port(7474, 7474, "api")
- .with_port(7687, 7687, "api2")
- .with_volume_mount(vol, "/var/lib/memgraph");
-
- local containerSet = engine.containers(
- "memgraph", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(7474, 7474, "api")
- .with_port(7687, 7687, "api2");
-
- engine.resources([
- vol,
- containerSet,
- service,
- ])
-
- },
-
- "memgraph-lab" +: {
-
- create:: function(engine)
-
- local container =
- engine.container("lab")
- .with_image(images.memgraph_lab)
- .with_environment({
- QUICK_CONNECT_MG_HOST: "memgraph",
- QUICK_CONNECT_MG_PORT: "7687",
- })
- .with_limits("1.0", "512M")
- .with_reservations("0.5", "512M")
- .with_port(3010, 3000, "http");
-
- local containerSet = engine.containers(
- "lab", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(3010, 3010, "http");
-
- engine.resources([
- containerSet,
- service,
- ])
-
- },
-
-}
diff --git a/templates/stores/milvus.jsonnet b/templates/stores/milvus.jsonnet
deleted file mode 100644
index 1c3e3734..00000000
--- a/templates/stores/milvus.jsonnet
+++ /dev/null
@@ -1,90 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-local minio = import "stores/minio.jsonnet";
-
-minio {
-
- etcd +: {
-
- create:: function(engine)
-
- local vol = engine.volume("etcd").with_size("20G");
-
- local container =
- engine.container("etcd")
- .with_image(images.etcd)
- .with_command([
- "etcd",
- "-advertise-client-urls=http://127.0.0.1:2379",
- "-listen-client-urls",
- "http://0.0.0.0:2379",
- "--data-dir",
- "/etcd",
- ])
- .with_environment({
- ETCD_AUTO_COMPACTION_MODE: "revision",
- ETCD_AUTO_COMPACTION_RETENTION: "1000",
- ETCD_QUOTA_BACKEND_BYTES: "4294967296",
- ETCD_SNAPSHOT_COUNT: "50000"
- })
- .with_limits("1.0", "128M")
- .with_reservations("0.25", "128M")
- .with_port(2379, 2379, "api")
- .with_volume_mount(vol, "/etcd");
-
- local containerSet = engine.containers(
- "etcd", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(2379, 2379, "api");
-
- engine.resources([
- vol,
- containerSet,
- service,
- ])
-
- },
-
- milvus +: {
-
- create:: function(engine)
-
- local vol = engine.volume("milvus").with_size("20G");
-
- local container =
- engine.container("milvus")
- .with_image(images.milvus)
- .with_command([
- "milvus", "run", "standalone"
- ])
- .with_environment({
- ETCD_ENDPOINTS: "etcd:2379",
- MINIO_ADDRESS: "minio:9000",
- })
- .with_limits("1.0", "256M")
- .with_reservations("0.5", "256M")
- .with_port(9091, 9091, "api")
- .with_port(19530, 19530, "api2")
- .with_volume_mount(vol, "/var/lib/milvus");
-
- local containerSet = engine.containers(
- "milvus", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(9091, 9091, "api")
- .with_port(19530, 19530, "api2");
-
- engine.resources([
- vol,
- containerSet,
- service,
- ])
-
- },
-
-}
diff --git a/templates/stores/minio.jsonnet b/templates/stores/minio.jsonnet
deleted file mode 100644
index 6ef1d96f..00000000
--- a/templates/stores/minio.jsonnet
+++ /dev/null
@@ -1,49 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-
-{
-
- minio +: {
-
- create:: function(engine)
-
- local vol = engine.volume("minio-data").with_size("20G");
-
- local container =
- engine.container("minio")
- .with_image(images.minio)
- .with_command([
- "minio",
- "server",
- "/minio_data",
- "--console-address",
- ":9001",
- ])
- .with_environment({
- MINIO_ROOT_USER: "minioadmin",
- MINIO_ROOT_PASSWORD: "minioadmin",
- })
- .with_limits("0.5", "128M")
- .with_reservations("0.25", "128M")
- .with_port(9000, 9000, "api")
- .with_port(9001, 9001, "console")
- .with_volume_mount(vol, "/minio_data");
-
- local containerSet = engine.containers(
- "etcd", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(9000, 9000, "api")
- .with_port(9001, 9001, "console");
-
- engine.resources([
- vol,
- containerSet,
- service,
- ])
-
- },
-
-}
diff --git a/templates/stores/neo4j.jsonnet b/templates/stores/neo4j.jsonnet
deleted file mode 100644
index 3a8bb783..00000000
--- a/templates/stores/neo4j.jsonnet
+++ /dev/null
@@ -1,47 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-
-{
-
- "neo4j" +: {
-
- create:: function(engine)
-
- local vol = engine.volume("neo4j").with_size("20G");
-
- local container =
- engine.container("neo4j")
- .with_image(images.neo4j)
- .with_environment({
- NEO4J_AUTH: "neo4j/password",
- NEO4J_server_memory_pagecache_size: "512m",
- NEO4J_server_memory_heap_max__size: "512m",
- // NEO4J_server_bolt_listen__address: "0.0.0.0:7687",
- // NEO4J_server_default__listen__address: "0.0.0.0",
- // NEO4J_server_http_listen__address: "0.0.0.0:7474",
- })
- .with_limits("1.0", "1536M")
- .with_reservations("0.5", "1536M")
- .with_port(7474, 7474, "api")
- .with_port(7687, 7687, "api2")
- .with_volume_mount(vol, "/data");
-
- local containerSet = engine.containers(
- "neo4j", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(7474, 7474, "api")
- .with_port(7687, 7687, "api2");
-
- engine.resources([
- vol,
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/stores/qdrant.jsonnet b/templates/stores/qdrant.jsonnet
deleted file mode 100644
index 9e807632..00000000
--- a/templates/stores/qdrant.jsonnet
+++ /dev/null
@@ -1,39 +0,0 @@
-local base = import "base/base.jsonnet";
-local images = import "values/images.jsonnet";
-
-{
-
- "qdrant" +: {
-
- create:: function(engine)
-
- local vol = engine.volume("qdrant").with_size("20G");
-
- local container =
- engine.container("qdrant")
- .with_image(images.qdrant)
- .with_limits("1.0", "1024M")
- .with_reservations("0.5", "1024M")
- .with_port(6333, 6333, "api")
- .with_port(6334, 6334, "api2")
- .with_volume_mount(vol, "/qdrant/storage");
-
- local containerSet = engine.containers(
- "qdrant", [ container ]
- );
-
- local service =
- engine.service(containerSet)
- .with_port(6333, 6333, "api")
- .with_port(6334, 6334, "api2");
-
- engine.resources([
- vol,
- containerSet,
- service,
- ])
-
- },
-
-}
-
diff --git a/templates/util/decode-config.jsonnet b/templates/util/decode-config.jsonnet
deleted file mode 100644
index 503b5b6b..00000000
--- a/templates/util/decode-config.jsonnet
+++ /dev/null
@@ -1,31 +0,0 @@
-
-local components = import "components.jsonnet";
-
-local apply = function(p, components)
-
- local base = {
-
- with:: function(k, v) self + {
- [k]:: v
- },
-
- with_params:: function(pars)
- self + std.foldl(
- function(obj, par) obj.with(par.key, par.value),
- std.objectKeysValues(pars),
- self
- ),
-
- };
-
- local component = base + components[p.name];
-
- component.with_params(p.parameters);
-
-local decode = function(config)
- local add = function(state, c) state + apply(c, components);
- local patterns = std.foldl(add, config, {});
- patterns;
-
-decode
-
diff --git a/templates/values/images.jsonnet b/templates/values/images.jsonnet
deleted file mode 100644
index 54dbd016..00000000
--- a/templates/values/images.jsonnet
+++ /dev/null
@@ -1,23 +0,0 @@
-local version = import "version.jsonnet";
-{
- cassandra: "docker.io/cassandra:4.1.6",
- neo4j: "docker.io/neo4j:5.26.0-community-bullseye",
- pulsar: "docker.io/apachepulsar/pulsar:3.3.1",
- pulsar_manager: "docker.io/apachepulsar/pulsar-manager:v0.4.0",
- etcd: "quay.io/coreos/etcd:v3.5.15",
- minio: "docker.io/minio/minio:RELEASE.2025-02-03T21-03-04Z",
- milvus: "docker.io/milvusdb/milvus:v2.4.9",
- prometheus: "docker.io/prom/prometheus:v2.53.2",
- grafana: "docker.io/grafana/grafana:11.1.4",
- trustgraph_base: "docker.io/trustgraph/trustgraph-base:" + version,
- trustgraph_flow: "docker.io/trustgraph/trustgraph-flow:" + version,
- trustgraph_ocr: "docker.io/trustgraph/trustgraph-ocr:" + version,
- trustgraph_bedrock: "docker.io/trustgraph/trustgraph-bedrock:" + version,
- trustgraph_vertexai: "docker.io/trustgraph/trustgraph-vertexai:" + version,
- trustgraph_hf: "docker.io/trustgraph/trustgraph-hf:" + version,
- qdrant: "docker.io/qdrant/qdrant:v1.13.3",
- memgraph_mage: "docker.io/memgraph/memgraph-mage:1.22-memgraph-2.22",
- memgraph_lab: "docker.io/memgraph/lab:2.19.1",
- falkordb: "docker.io/falkordb/falkordb:latest",
- "workbench-ui": "docker.io/trustgraph/workbench-ui:0.2.4",
-}
diff --git a/templates/values/url.jsonnet b/templates/values/url.jsonnet
deleted file mode 100644
index 1bacb067..00000000
--- a/templates/values/url.jsonnet
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- pulsar: "pulsar://pulsar:6650",
- pulsar_admin: "http://pulsar:8080",
- milvus: "http://milvus:19530",
- qdrant: "http://qdrant:6333",
-}
diff --git a/templates/zip-readme.md b/templates/zip-readme.md
deleted file mode 100644
index 0b117792..00000000
--- a/templates/zip-readme.md
+++ /dev/null
@@ -1,28 +0,0 @@
-
-Note! this is a subset of possible configurations, to generate your own
-launch config use the config util...
-
-- Production: https://config-ui.demo.trustgraph.ai
-- Early release: https://dev.config-ui.demo.trustgraph.ai
-
-The config util auto-generates deployment instructions for your
-configuration, so that's the recommended way to deploy.
-
-----------------------------------------------------------------------------
-
-These are launch configurations for TrustGraph. See https://trustgraph.ai for
-the quickstart using docker compose.
-
-Hint for Linux: There are files here which get mounted as volumes inside
-Docker Compose containers. This may trigger SELinux rules on your system, to
-permit access insider the containers, use a command like this...
-
-chcon -Rt svirt_sandbox_file_t grafana/ prometheus/
-
-The file vertexai/private.json is a placeholder for real GCP credentials if
-you are using the VertexAI LLM. If you're using that in Docker Compose,
-replace with your real credentials, and don't forget to permit access if you
-are using Linux:
-
-chcon -Rt svirt_sandbox_file_t vertexai/
-
diff --git a/test-api/test-agent-api b/test-api/test-agent-api
index f36ba196..bba0e70d 100755
--- a/test-api/test-agent-api
+++ b/test-api/test-agent-api
@@ -4,7 +4,7 @@ import requests
import json
import sys
-url = "http://localhost:8088/api/v1/"
+url = "http://localhost:8088/api/v1/flow/0000/agent"
############################################################################
@@ -13,10 +13,11 @@ input = {
}
resp = requests.post(
- f"{url}agent",
+ url,
json=input,
)
+print(resp.text)
resp = resp.json()
if "error" in resp:
@@ -25,4 +26,3 @@ if "error" in resp:
print(resp["answer"])
-
diff --git a/test-api/test-config-api b/test-api/test-config-api
new file mode 100755
index 00000000..4bb15334
--- /dev/null
+++ b/test-api/test-config-api
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+ "operation": "config"
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
+
+input = {
+ "operation": "put",
+ "values": [
+ {
+ "type": "test",
+ "key": "key1",
+ "value": "value1"
+ },
+ {
+ "type": "test",
+ "key": "key2",
+ "value": "value2"
+ }
+ ]
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
+
+input = {
+ "operation": "put",
+ "values": [
+ {
+ "type": "test",
+ "key": "key3",
+ "value": "testing 1 2 3"
+ }
+ ]
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
+
+input = {
+ "operation": "get",
+ "keys": [
+ {
+ "type": "test",
+ "key": "key2"
+ },
+ {
+ "type": "test",
+ "key": "key3"
+ }
+ ]
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
+
+input = {
+ "operation": "config"
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
+
+input = {
+ "operation": "list",
+ "type": "test"
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
+
+input = {
+ "operation": "getvalues",
+ "type": "test"
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
+
+input = {
+ "operation": "delete",
+ "keys": [
+ {
+ "type": "test",
+ "key": "key1"
+ },
+ {
+ "type": "test",
+ "key": "key3"
+ }
+ ]
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
+
+input = {
+ "operation": "config"
+}
+
+resp = requests.post(
+ f"{url}config",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(json.dumps(resp, indent=4))
+
+############################################################################
diff --git a/test-api/test-embeddings-api b/test-api/test-embeddings-api
index b1defd01..dd15af31 100755
--- a/test-api/test-embeddings-api
+++ b/test-api/test-embeddings-api
@@ -4,7 +4,7 @@ import requests
import json
import sys
-url = "http://localhost:8088/api/v1/"
+url = "http://localhost:8088/api/v1/flow/0000/embeddings"
############################################################################
@@ -13,7 +13,7 @@ input = {
}
resp = requests.post(
- f"{url}embeddings",
+ url,
json=input,
)
@@ -25,4 +25,3 @@ if "error" in resp:
print(resp["vectors"])
-
diff --git a/test-api/test-graph-rag-api b/test-api/test-graph-rag-api
index c329934c..886d0c15 100755
--- a/test-api/test-graph-rag-api
+++ b/test-api/test-graph-rag-api
@@ -4,7 +4,7 @@ import requests
import json
import sys
-url = "http://localhost:8088/api/v1/"
+url = "http://localhost:8088/api/v1/flow/0000/graph-rag"
############################################################################
@@ -13,7 +13,7 @@ input = {
}
resp = requests.post(
- f"{url}graph-rag",
+ url,
json=input,
)
diff --git a/test-api/test-knowledge-delete b/test-api/test-knowledge-delete
new file mode 100755
index 00000000..d800b347
--- /dev/null
+++ b/test-api/test-knowledge-delete
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+ "operation": "delete-kg-core",
+ "id": "https://trustgraph.ai/doc/intelligence-and-state",
+ "user": "trustgraph",
+}
+
+resp = requests.post(
+ f"{url}knowledge",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-knowledge-fetch b/test-api/test-knowledge-fetch
new file mode 100755
index 00000000..c327a9d8
--- /dev/null
+++ b/test-api/test-knowledge-fetch
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+ "operation": "fetch-kg-core",
+ "id": "https://trustgraph.ai/doc/intelligence-and-state",
+ "user": "trustgraph",
+}
+
+resp = requests.post(
+ f"{url}knowledge",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-knowledge-fetch2 b/test-api/test-knowledge-fetch2
new file mode 100755
index 00000000..2a611547
--- /dev/null
+++ b/test-api/test-knowledge-fetch2
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+import requests
+import asyncio
+import json
+import sys
+import base64
+import time
+from websockets.asyncio.client import connect
+
+url = "ws://localhost:8088/api/v1/socket"
+
+############################################################################
+
+async def run():
+
+ async with connect(url) as ws:
+
+ req = {
+ "id": "aa11",
+ "service": "knowledge",
+ "request": {
+ "operation": "fetch-kg-core",
+ "user": "trustgraph",
+ "id": "https://trustgraph.ai/doc/intelligence-and-state"
+ }
+ }
+
+ await ws.send(json.dumps(req))
+
+ while True:
+
+ msg = await ws.recv()
+ obj = json.loads(msg)
+
+ print(obj)
+
+ if "error" in obj:
+ print(f"Error: {obj['error']}")
+ break
+
+ if "response" not in obj: continue
+
+ if "eos" in obj["response"]:
+ if obj["response"]["eos"]:
+ break
+
+############################################################################
+
+asyncio.run(run())
diff --git a/test-api/test-knowledge-list b/test-api/test-knowledge-list
new file mode 100755
index 00000000..b616c719
--- /dev/null
+++ b/test-api/test-knowledge-list
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+ "operation": "list-kg-cores",
+ "user": "trustgraph",
+}
+
+resp = requests.post(
+ f"{url}knowledge",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-add-doc b/test-api/test-library-add-doc
new file mode 100755
index 00000000..d0fcb0d2
--- /dev/null
+++ b/test-api/test-library-add-doc
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+id = "http://trustgraph.ai/doc/9fdee98b-b259-40ac-bcb9-8e82ccedeb04"
+
+with open("docs/README.cats", "rb") as f:
+ doc = base64.b64encode(f.read()).decode("utf-8")
+
+input = {
+ "operation": "add-document",
+ "document-metadata": {
+ "id": id,
+ "time": int(time.time()),
+ "kind": "text/plain",
+ "title": "Mark's cats",
+ "comments": "Test doc taken from the TrustGraph repo",
+ "metadata": [
+ {
+ "s": {
+ "v": id,
+ "e": True,
+ },
+ "p": {
+ "v": "http://www.w3.org/2000/01/rdf-schema#label",
+ "e": True,
+ },
+ "o": {
+ "v": "Mark's pets", "e": False,
+ },
+ },
+ {
+ "s": {
+ "v": id,
+ "e": True,
+ },
+ "p": {
+ "v": 'https://schema.org/keywords',
+ "e": True,
+ },
+ "o": {
+ "v": "cats", "e": False,
+ },
+ },
+ ],
+ "user": "trustgraph",
+ "tags": ["mark", "cats"],
+ },
+ "content": doc,
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-add-doc2 b/test-api/test-library-add-doc2
new file mode 100755
index 00000000..f886c739
--- /dev/null
+++ b/test-api/test-library-add-doc2
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+id = "http://trustgraph.ai/doc/6d034da9-2759-45c2-af24-14db7f4c44c2"
+
+source = "../sources/20160001634.pdf"
+
+with open(source, "rb") as f:
+ doc = base64.b64encode(f.read()).decode("utf-8")
+
+input = {
+ "operation": "add-document",
+ "document-metadata": {
+ "id": id,
+ "time": int(time.time()),
+ "kind": "application/pdf",
+ "title": "Application of SAE ARP4754A to Flight Critical Systems",
+ "comments": "Application of federal safety standards to NASA spacecraft",
+ "metadata": [
+ {
+ "s": {
+ "v": id,
+ "e": True,
+ },
+ "p": {
+ "v": "http://www.w3.org/2000/01/rdf-schema#label",
+ "e": True,
+ },
+ "o": {
+ "v": "Challenger report volume 1", "e": False,
+ },
+ },
+ {
+ "s": {
+ "v": id,
+ "e": True,
+ },
+ "p": {
+ "v": 'https://schema.org/keywords',
+ "e": True,
+ },
+ "o": {
+ "v": "space shuttle", "e": False,
+ },
+ },
+ {
+ "s": {
+ "v": id,
+ "e": True,
+ },
+ "p": {
+ "v": 'https://schema.org/keywords',
+ "e": True,
+ },
+ "o": {
+ "v": "nasa", "e": False,
+ },
+ },
+ ],
+ "user": "trustgraph",
+ "tags": ["nasa", "safety-engineering"],
+ },
+ "content": doc,
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-add-processing b/test-api/test-library-add-processing
new file mode 100755
index 00000000..f1692b0a
--- /dev/null
+++ b/test-api/test-library-add-processing
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+doc_id = "http://trustgraph.ai/doc/9fdee98b-b259-40ac-bcb9-8e82ccedeb04"
+
+proc_id = "2714fc72-44ab-45f2-94dd-6773fc336535"
+
+input = {
+ "operation": "add-processing",
+ "processing-metadata": {
+ "id": proc_id,
+ "document-id": doc_id,
+ "time": int(time.time()),
+ "flow": "0000",
+ "user": "trustgraph",
+ "collection": "default",
+ "tags": ["test"],
+ }
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-add-processing2 b/test-api/test-library-add-processing2
new file mode 100755
index 00000000..613da630
--- /dev/null
+++ b/test-api/test-library-add-processing2
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+doc_id = "http://trustgraph.ai/doc/6d034da9-2759-45c2-af24-14db7f4c44c2"
+
+proc_id = "72be9c56-a63a-4dde-8f3c-9b35f2598b83"
+
+input = {
+ "operation": "add-processing",
+ "processing-metadata": {
+ "id": proc_id,
+ "document-id": doc_id,
+ "time": int(time.time()),
+ "flow": "0000",
+ "user": "trustgraph",
+ "collection": "default",
+ "tags": ["test"],
+ }
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-get-document-content b/test-api/test-library-get-document-content
new file mode 100755
index 00000000..5a8b2880
--- /dev/null
+++ b/test-api/test-library-get-document-content
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+id = "http://trustgraph.ai/doc/9fdee98b-b259-40ac-bcb9-8e82ccedeb04"
+
+user = "trustgraph"
+
+input = {
+ "operation": "get-document-content",
+ "user": user,
+ "document-id": id,
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+
+content = base64.b64decode(resp["content"]).decode("utf-8")
+
+print(content)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-get-document-metadata b/test-api/test-library-get-document-metadata
new file mode 100755
index 00000000..0bcdd321
--- /dev/null
+++ b/test-api/test-library-get-document-metadata
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+id = "http://trustgraph.ai/doc/9fdee98b-b259-40ac-bcb9-8e82ccedeb04"
+
+user = "trustgraph"
+
+input = {
+ "operation": "get-document-metadata",
+ "user": user,
+ "document-id": id,
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-list b/test-api/test-library-list
new file mode 100755
index 00000000..cecb835f
--- /dev/null
+++ b/test-api/test-library-list
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+user = "trustgraph"
+
+input = {
+ "operation": "list-documents",
+ "user": user,
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-list-documents b/test-api/test-library-list-documents
new file mode 100755
index 00000000..9677ce4d
--- /dev/null
+++ b/test-api/test-library-list-documents
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+ "operation": "list-documents",
+ "user": "trustgraph",
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-list-processing b/test-api/test-library-list-processing
new file mode 100755
index 00000000..1d31a572
--- /dev/null
+++ b/test-api/test-library-list-processing
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+ "operation": "list-processing",
+ "user": "trustgraph",
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-remove-document b/test-api/test-library-remove-document
new file mode 100755
index 00000000..6354c292
--- /dev/null
+++ b/test-api/test-library-remove-document
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+id = "http://trustgraph.ai/doc/9fdee98b-b259-40ac-bcb9-8e82ccedeb04"
+
+input = {
+ "operation": "remove-document",
+ "user": "trustgraph",
+ "document-id": id
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-remove-document2 b/test-api/test-library-remove-document2
new file mode 100755
index 00000000..fd57d025
--- /dev/null
+++ b/test-api/test-library-remove-document2
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+id = "http://trustgraph.ai/doc/6d034da9-2759-45c2-af24-14db7f4c44c2"
+
+input = {
+ "operation": "remove-document",
+ "user": "trustgraph",
+ "document-id": id
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-remove-processing b/test-api/test-library-remove-processing
new file mode 100755
index 00000000..51bbcc42
--- /dev/null
+++ b/test-api/test-library-remove-processing
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+proc_id = "2714fc72-44ab-45f2-94dd-6773fc336535"
+
+input = {
+ "operation": "remove-processing",
+ "user": "trustgraph",
+ "processing-id": proc_id,
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-library-update-doc b/test-api/test-library-update-doc
new file mode 100755
index 00000000..eee4170d
--- /dev/null
+++ b/test-api/test-library-update-doc
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+import base64
+import time
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+id = "http://trustgraph.ai/doc/9fdee98b-b259-40ac-bcb9-8e82ccedeb04"
+
+input = {
+ "operation": "update-document",
+ "document-metadata": {
+ "id": id,
+ "time": int(time.time()),
+ "title": "Mark's cats - a story",
+ "comments": "Information about Mark's cats",
+ "metadata": [
+ {
+ "s": {
+ "v": id,
+ "e": True,
+ },
+ "p": {
+ "v": "http://www.w3.org/2000/01/rdf-schema#label",
+ "e": True,
+ },
+ "o": {
+ "v": "Mark's pets", "e": False,
+ },
+ },
+ {
+ "s": {
+ "v": id,
+ "e": True,
+ },
+ "p": {
+ "v": 'https://schema.org/keywords',
+ "e": True,
+ },
+ "o": {
+ "v": "cats", "e": False,
+ },
+ },
+ ],
+ "user": "trustgraph",
+ "tags": ["mark", "cats", "pets"],
+ },
+}
+
+resp = requests.post(
+ f"{url}librarian",
+ json=input,
+)
+
+print(resp.text)
+resp = resp.json()
+
+print(resp)
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+# print(resp["response"])
+print(resp)
+
+sys.exit(0)
+
+############################################################################
+
diff --git a/test-api/test-llm-api b/test-api/test-llm-api
index 6bee2048..fa100b15 100755
--- a/test-api/test-llm-api
+++ b/test-api/test-llm-api
@@ -4,7 +4,7 @@ import requests
import json
import sys
-url = "http://localhost:8088/api/v1/"
+url = "http://localhost:8088/api/v1/flow/0000/text-completion"
############################################################################
@@ -15,7 +15,7 @@ input = {
}
resp = requests.post(
- f"{url}text-completion",
+ url,
json=input,
)
diff --git a/test-api/test-llm2-api b/test-api/test-llm2-api
new file mode 100755
index 00000000..93d9d348
--- /dev/null
+++ b/test-api/test-llm2-api
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+import sys
+
+url = "http://localhost:8088/api/v1/"
+
+############################################################################
+
+input = {
+ "system": "",
+ "prompt": "Add 2 and 3"
+}
+
+resp = requests.post(
+ f"{url}text-completion",
+ json=input,
+)
+
+if resp.status_code != 200:
+ raise RuntimeError(f"Status code: {resp.status_code}")
+
+resp = resp.json()
+
+if "error" in resp:
+ print(f"Error: {resp['error']}")
+ sys.exit(1)
+
+print(resp["response"])
+
+############################################################################
+
diff --git a/test-api/test-load-document b/test-api/test-load-document
index 77a0dffd..03dfc787 100755
--- a/test-api/test-load-document
+++ b/test-api/test-load-document
@@ -5,7 +5,7 @@ import json
import sys
import base64
-url = "http://localhost:8088/api/v1/"
+url = "http://localhost:8088/api/v1/flow/0000/document-load"
############################################################################
@@ -88,10 +88,7 @@ input = {
}
-resp = requests.post(
- f"{url}load/document",
- json=input,
-)
+resp = requests.post(url, json=input)
resp = resp.json()
diff --git a/test-api/test-load-text b/test-api/test-load-text
index 6865bdd4..ecf3d254 100755
--- a/test-api/test-load-text
+++ b/test-api/test-load-text
@@ -5,7 +5,7 @@ import json
import sys
import base64
-url = "http://localhost:8088/api/v1/"
+url = "http://localhost:8088/api/v1/flow/0000/service/text-load"
############################################################################
@@ -85,10 +85,7 @@ input = {
}
-resp = requests.post(
- f"{url}load/text",
- json=input,
-)
+resp = requests.post(url, json=input)
resp = resp.json()
diff --git a/test-api/test-prompt-api b/test-api/test-prompt-api
index 4f69f09a..8cd6615b 100755
--- a/test-api/test-prompt-api
+++ b/test-api/test-prompt-api
@@ -4,7 +4,7 @@ import requests
import json
import sys
-url = "http://localhost:8088/api/v1/"
+url = "http://localhost:8088/api/v1/flow/0000/prompt"
############################################################################
@@ -16,7 +16,7 @@ input = {
}
resp = requests.post(
- f"{url}prompt",
+ url,
json=input,
)
diff --git a/test-api/test-prompt2-api b/test-api/test-prompt2-api
index 1e641439..8b9c55c7 100755
--- a/test-api/test-prompt2-api
+++ b/test-api/test-prompt2-api
@@ -4,7 +4,7 @@ import requests
import json
import sys
-url = "http://localhost:8088/api/v1/"
+url = "http://localhost:8088/api/v1/flow/0000/prompt"
############################################################################
@@ -16,7 +16,7 @@ input = {
}
resp = requests.post(
- f"{url}prompt",
+ url,
json=input,
)
diff --git a/tests/test-agent b/tests/test-agent
index 4782bbae..b1420098 100755
--- a/tests/test-agent
+++ b/tests/test-agent
@@ -20,7 +20,11 @@ def output(text, prefix="> ", width=78):
)
print(out)
-p = AgentClient(pulsar_host="pulsar://localhost:6650")
+p = AgentClient(
+ pulsar_host="pulsar://pulsar:6650",
+ input_queue = "non-persistent://tg/request/agent:0000",
+ output_queue = "non-persistent://tg/response/agent:0000",
+)
q = "How many cats does Mark have? Calculate that number raised to 0.4 power. Is that number lower than the numeric part of the mission identifier of the Space Shuttle Challenger on its last mission? If so, give me an apple pie recipe, otherwise return a poem about cheese."
diff --git a/tests/test-config b/tests/test-config
new file mode 100644
index 00000000..63f77b6b
--- /dev/null
+++ b/tests/test-config
@@ -0,0 +1,2 @@
+#!/usr/bin/env python3
+
diff --git a/tests/test-doc-rag b/tests/test-doc-rag
index 718157b6..b7382bf5 100755
--- a/tests/test-doc-rag
+++ b/tests/test-doc-rag
@@ -3,7 +3,12 @@
import pulsar
from trustgraph.clients.document_rag_client import DocumentRagClient
-rag = DocumentRagClient(pulsar_host="pulsar://localhost:6650")
+rag = DocumentRagClient(
+ pulsar_host="pulsar://localhost:6650",
+ subscriber="test1",
+ input_queue = "non-persistent://tg/request/document-rag:default",
+ output_queue = "non-persistent://tg/response/document-rag:default",
+)
query="""
What was the cause of the space shuttle disaster?"""
diff --git a/tests/test-embeddings b/tests/test-embeddings
index 3855fcf0..5fcd31e6 100755
--- a/tests/test-embeddings
+++ b/tests/test-embeddings
@@ -3,7 +3,12 @@
import pulsar
from trustgraph.clients.embeddings_client import EmbeddingsClient
-embed = EmbeddingsClient(pulsar_host="pulsar://localhost:6650")
+embed = EmbeddingsClient(
+ pulsar_host="pulsar://pulsar:6650",
+ input_queue="non-persistent://tg/request/embeddings:default",
+ output_queue="non-persistent://tg/response/embeddings:default",
+ subscriber="test1",
+)
prompt="Write a funny limerick about a llama"
@@ -11,5 +16,3 @@ resp = embed.request(prompt)
print(resp)
-
-
diff --git a/tests/test-flow b/tests/test-flow
new file mode 100755
index 00000000..87a349af
--- /dev/null
+++ b/tests/test-flow
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+import requests
+
+url = "http://localhost:8088/"
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "list-classes",
+ }
+)
+
+print(resp)
+print(resp.text)
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "get-class",
+ "class-name": "default",
+ }
+)
+
+print(resp)
+print(resp.text)
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "put-class",
+ "class-name": "bunch",
+ "class-definition": "{}",
+ }
+)
+
+print(resp)
+print(resp.text)
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "get-class",
+ "class-name": "bunch",
+ }
+)
+
+print(resp)
+print(resp.text)
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "list-classes",
+ }
+)
+
+print(resp)
+print(resp.text)
+
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "delete-class",
+ "class-name": "bunch",
+ }
+)
+
+print(resp)
+print(resp.text)
+
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "list-classes",
+ }
+)
+
+print(resp)
+print(resp.text)
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "list-flows",
+ }
+)
+
+print(resp)
+print(resp.text)
diff --git a/tests/test-flow-get-class b/tests/test-flow-get-class
new file mode 100755
index 00000000..20707b51
--- /dev/null
+++ b/tests/test-flow-get-class
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+
+import requests
+
+url = "http://localhost:8088/"
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "get-class",
+ "class-name": "default",
+ }
+)
+
+resp = resp.json()
+
+print(resp["class-definition"])
+
+
diff --git a/tests/test-flow-put-class b/tests/test-flow-put-class
new file mode 100755
index 00000000..8fd4d9f2
--- /dev/null
+++ b/tests/test-flow-put-class
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+
+url = "http://localhost:8088/"
+
+defn = {"class": {"de-query:{class}": {"request": "non-persistent://tg/request/document-embeddings:{class}", "response": "non-persistent://tg/response/document-embeddings:{class}"}, "document-rag:{class}": {"document-embeddings-request": "non-persistent://tg/request/document-embeddings:{class}", "document-embeddings-response": "non-persistent://tg/response/document-embeddings:{class}", "embeddings-request": "non-persistent://tg/request/embeddings:{class}", "embeddings-response": "non-persistent://tg/response/embeddings:{class}", "prompt-request": "non-persistent://tg/request/prompt-rag:{class}", "prompt-response": "non-persistent://tg/response/prompt-rag:{class}", "request": "non-persistent://tg/request/document-rag:{class}", "response": "non-persistent://tg/response/document-rag:{class}"}, "embeddings:{class}": {"request": "non-persistent://tg/request/embeddings:{class}", "response": "non-persistent://tg/response/embeddings:{class}"}, "ge-query:{class}": {"request": "non-persistent://tg/request/graph-embeddings:{class}", "response": "non-persistent://tg/response/graph-embeddings:{class}"}, "graph-rag:{class}": {"embeddings-request": "non-persistent://tg/request/embeddings:{class}", "embeddings-response": "non-persistent://tg/response/embeddings:{class}", "graph-embeddings-request": "non-persistent://tg/request/graph-embeddings:{class}", "graph-embeddings-response": "non-persistent://tg/response/graph-embeddings:{class}", "prompt-request": "non-persistent://tg/request/prompt-rag:{class}", "prompt-response": "non-persistent://tg/response/prompt-rag:{class}", "request": "non-persistent://tg/request/graph-rag:{class}", "response": "non-persistent://tg/response/graph-rag:{class}", "triples-request": "non-persistent://tg/request/triples:{class}", "triples-response": "non-persistent://tg/response/triples:{class}"}, "metering-rag:{class}": {"input": "non-persistent://tg/response/text-completion-rag:{class}"}, "metering:{class}": {"input": "non-persistent://tg/response/text-completion:{class}"}, "prompt-rag:{class}": {"request": "non-persistent://tg/request/prompt-rag:{class}", "response": "non-persistent://tg/response/prompt-rag:{class}", "text-completion-request": "non-persistent://tg/request/text-completion-rag:{class}", "text-completion-response": "non-persistent://tg/response/text-completion-rag:{class}"}, "prompt:{class}": {"request": "non-persistent://tg/request/prompt:{class}", "response": "non-persistent://tg/response/prompt:{class}", "text-completion-request": "non-persistent://tg/request/text-completion:{class}", "text-completion-response": "non-persistent://tg/response/text-completion:{class}"}, "text-completion-rag:{class}": {"request": "non-persistent://tg/request/text-completion-rag:{class}", "response": "non-persistent://tg/response/text-completion-rag:{class}"}, "text-completion:{class}": {"request": "non-persistent://tg/request/text-completion:{class}", "response": "non-persistent://tg/response/text-completion:{class}"}, "triples-query:{class}": {"request": "non-persistent://tg/request/triples:{class}", "response": "non-persistent://tg/response/triples:{class}"}}, "description": "Default flow class, supports GraphRAG and document RAG", "flow": {"agent-manager:{id}": {"graph-rag-request": "non-persistent://tg/request/graph-rag:{class}", "graph-rag-response": "non-persistent://tg/response/graph-rag:{class}", "next": "non-persistent://tg/request/agent:{id}", "prompt-request": "non-persistent://tg/request/prompt:{class}", "prompt-response": "non-persistent://tg/response/prompt:{class}", "request": "non-persistent://tg/request/agent:{id}", "response": "non-persistent://tg/response/agent:{id}", "text-completion-request": "non-persistent://tg/request/text-completion:{class}", "text-completion-response": "non-persistent://tg/response/text-completion:{class}"}, "chunker:{id}": {"input": "persistent://tg/flow/text-document-load:{id}", "output": "persistent://tg/flow/chunk-load:{id}"}, "de-write:{id}": {"input": "persistent://tg/flow/document-embeddings-store:{id}"}, "document-embeddings:{id}": {"embeddings-request": "non-persistent://tg/request/embeddings:{class}", "embeddings-response": "non-persistent://tg/response/embeddings:{class}", "input": "persistent://tg/flow/chunk-load:{id}", "output": "persistent://tg/flow/document-embeddings-store:{id}"}, "ge-write:{id}": {"input": "persistent://tg/flow/graph-embeddings-store:{id}"}, "graph-embeddings:{id}": {"embeddings-request": "non-persistent://tg/request/embeddings:{class}", "embeddings-response": "non-persistent://tg/response/embeddings:{class}", "input": "persistent://tg/flow/entity-contexts-load:{id}", "output": "persistent://tg/flow/graph-embeddings-store:{id}"}, "kg-extract-definitions:{id}": {"entity-contexts": "persistent://tg/flow/entity-contexts-load:{id}", "input": "persistent://tg/flow/chunk-load:{id}", "prompt-request": "non-persistent://tg/request/prompt:{class}", "prompt-response": "non-persistent://tg/response/prompt:{class}", "triples": "persistent://tg/flow/triples-store:{id}"}, "kg-extract-relationships:{id}": {"input": "persistent://tg/flow/chunk-load:{id}", "prompt-request": "non-persistent://tg/request/prompt:{class}", "prompt-response": "non-persistent://tg/response/prompt:{class}", "triples": "persistent://tg/flow/triples-store:{id}"}, "pdf-decoder:{id}": {"input": "persistent://tg/flow/document-load:{id}", "output": "persistent://tg/flow/text-document-load:{id}"}, "triples-write:{id}": {"input": "persistent://tg/flow/triples-store:{id}"}}, "tags": ["document-rag", "graph-rag", "knowledge-extraction"]}
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "put-class",
+ "class-name": "default",
+ "class-definition": json.dumps(defn),
+ }
+)
+
+resp = resp.json()
+
+print(resp)
+
diff --git a/tests/test-flow-start-flow b/tests/test-flow-start-flow
new file mode 100755
index 00000000..15a3c0cc
--- /dev/null
+++ b/tests/test-flow-start-flow
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+
+url = "http://localhost:8088/"
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "start-flow",
+ "flow-id": "0003",
+ "class-name": "default",
+ }
+)
+
+print(resp)
+print(resp.text)
+resp = resp.json()
+
+
+print(resp)
+
diff --git a/tests/test-flow-stop-flow b/tests/test-flow-stop-flow
new file mode 100755
index 00000000..62ea1aa9
--- /dev/null
+++ b/tests/test-flow-stop-flow
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+import requests
+import json
+
+url = "http://localhost:8088/"
+
+resp = requests.post(
+ f"{url}/api/v1/flow",
+ json={
+ "operation": "stop-flow",
+ "flow-id": "0003",
+ }
+)
+
+print(resp)
+print(resp.text)
+resp = resp.json()
+
+
+print(resp)
+
diff --git a/tests/test-get-config b/tests/test-get-config
new file mode 100755
index 00000000..ecabe032
--- /dev/null
+++ b/tests/test-get-config
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+
+import pulsar
+from trustgraph.clients.config_client import ConfigClient
+
+cli = ConfigClient(pulsar_host="pulsar://localhost:6650")
+
+resp = cli.request_config()
+
+print(resp)
+
diff --git a/tests/test-graph-rag b/tests/test-graph-rag
index 036f73f4..b62f890c 100755
--- a/tests/test-graph-rag
+++ b/tests/test-graph-rag
@@ -3,11 +3,18 @@
import pulsar
from trustgraph.clients.graph_rag_client import GraphRagClient
-rag = GraphRagClient(pulsar_host="pulsar://localhost:6650")
+rag = GraphRagClient(
+ pulsar_host="pulsar://localhost:6650",
+ subscriber="test1",
+ input_queue = "non-persistent://tg/request/graph-rag:default",
+ output_queue = "non-persistent://tg/response/graph-rag:default",
+)
-query="""
-This knowledge graph describes the Space Shuttle disaster.
-Present 20 facts which are present in the knowledge graph."""
+#query="""
+#This knowledge graph describes the Space Shuttle disaster.
+#Present 20 facts which are present in the knowledge graph."""
+
+query = "How many cats does Mark have?"
resp = rag.request(query)
diff --git a/tests/test-llm b/tests/test-llm
index 4e86387a..aaae30a6 100755
--- a/tests/test-llm
+++ b/tests/test-llm
@@ -3,14 +3,17 @@
import pulsar
from trustgraph.clients.llm_client import LlmClient
-llm = LlmClient(pulsar_host="pulsar://localhost:6650")
+llm = LlmClient(
+ pulsar_host="pulsar://pulsar:6650",
+ input_queue="non-persistent://tg/request/text-completion:default",
+ output_queue="non-persistent://tg/response/text-completion:default",
+ subscriber="test1",
+)
system = "You are a lovely assistant."
-prompt="Write a funny limerick about a llama"
+prompt="what is 2 + 2 == 5"
resp = llm.request(system, prompt)
print(resp)
-
-
diff --git a/tests/test-load-pdf b/tests/test-load-pdf
new file mode 100755
index 00000000..838a57ce
--- /dev/null
+++ b/tests/test-load-pdf
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+import pulsar
+from pulsar.schema import JsonSchema
+import base64
+
+from trustgraph.schema import Document, Metadata
+
+client = pulsar.Client("pulsar://localhost:6650", listener_name="localhost")
+
+prod = client.create_producer(
+ topic="persistent://tg/flow/document-load:0000",
+ schema=JsonSchema(Document),
+ chunking_enabled=True,
+)
+
+path = "../sources/Challenger-Report-Vol1.pdf"
+
+with open(path, "rb") as f:
+ blob = base64.b64encode(f.read()).decode("utf-8")
+
+message = Document(
+ metadata = Metadata(
+ id = "00001",
+ metadata = [],
+ user="trustgraph",
+ collection="default",
+ ),
+ data=blob
+)
+
+prod.send(message)
+
+prod.close()
+client.close()
+
diff --git a/tests/test-load-text b/tests/test-load-text
new file mode 100755
index 00000000..83006c6d
--- /dev/null
+++ b/tests/test-load-text
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+
+import pulsar
+from pulsar.schema import JsonSchema
+import base64
+
+from trustgraph.schema import TextDocument, Metadata
+
+client = pulsar.Client("pulsar://localhost:6650", listener_name="localhost")
+
+prod = client.create_producer(
+ topic="persistent://tg/flow/text-document-load:0000",
+ schema=JsonSchema(TextDocument),
+ chunking_enabled=True,
+)
+
+path = "../trustgraph/docs/README.cats"
+
+with open(path, "r") as f:
+# blob = base64.b64encode(f.read()).decode("utf-8")
+ blob = f.read()
+
+message = TextDocument(
+ metadata = Metadata(
+ id = "00001",
+ metadata = [],
+ user="trustgraph",
+ collection="default",
+ ),
+ text=blob
+)
+
+prod.send(message)
+
+prod.close()
+client.close()
+
diff --git a/tests/test-prompt-extraction b/tests/test-prompt-extraction
index c73bd2e2..20aaaf50 100755
--- a/tests/test-prompt-extraction
+++ b/tests/test-prompt-extraction
@@ -3,7 +3,12 @@
import json
from trustgraph.clients.prompt_client import PromptClient
-p = PromptClient(pulsar_host="pulsar://localhost:6650")
+p = PromptClient(
+ pulsar_host="pulsar://localhost:6650",
+ input_queue="non-persistent://tg/request/prompt:default",
+ output_queue="non-persistent://tg/response/prompt:default",
+ subscriber="test1",
+)
chunk="""
The Space Shuttle was a reusable spacecraft that transported astronauts and cargo to and from Earth's orbit. It was designed to launch like a rocket, maneuver in orbit like a spacecraft, and land like an airplane. The Space Shuttle was NASA's space transportation system and was used for many purposes, including:
@@ -31,8 +36,8 @@ The Space Shuttle's last mission was in 2011.
q = "Tell me some facts in the knowledge graph"
resp = p.request(
- id="extract-definition",
- terms = {
+ id="extract-definitions",
+ variables = {
"text": chunk,
}
)
@@ -40,7 +45,7 @@ resp = p.request(
print(resp)
for fact in resp:
- print(fact["term"], "::")
+ print(fact["entity"], "::")
print(fact["definition"])
print()
diff --git a/tests/test-prompt-question b/tests/test-prompt-question
index 50660965..78ba72aa 100755
--- a/tests/test-prompt-question
+++ b/tests/test-prompt-question
@@ -3,13 +3,18 @@
import pulsar
from trustgraph.clients.prompt_client import PromptClient
-p = PromptClient(pulsar_host="pulsar://localhost:6650")
+p = PromptClient(
+ pulsar_host="pulsar://localhost:6650",
+ input_queue="non-persistent://tg/request/prompt:default",
+ output_queue="non-persistent://tg/response/prompt:default",
+ subscriber="test1",
+)
question = """What is the square root of 16?"""
resp = p.request(
id="question",
- terms = {
+ variables = {
"question": question
}
)
diff --git a/tests/test-triples b/tests/test-triples
index 05263d0d..e804d844 100755
--- a/tests/test-triples
+++ b/tests/test-triples
@@ -3,7 +3,9 @@
import pulsar
from trustgraph.clients.triples_query_client import TriplesQueryClient
-tq = TriplesQueryClient(pulsar_host="pulsar://localhost:6650")
+tq = TriplesQueryClient(
+ pulsar_host="pulsar://localhost:6650",
+)
e = "http://trustgraph.ai/e/shuttle"
diff --git a/trustgraph-base/trustgraph/api/api.py b/trustgraph-base/trustgraph/api/api.py
index 4c72d3ca..73adc7a3 100644
--- a/trustgraph-base/trustgraph/api/api.py
+++ b/trustgraph-base/trustgraph/api/api.py
@@ -1,26 +1,31 @@
import requests
import json
-import dataclasses
import base64
+import time
-from trustgraph.knowledge import hash, Uri, Literal
+from . library import Library
+from . flow import Flow
+from . config import Config
+from . knowledge import Knowledge
+from . exceptions import *
+from . types import *
-class ProtocolException(Exception):
- pass
+def check_error(response):
-class ApplicationException(Exception):
- pass
+ if "error" in response:
-@dataclasses.dataclass
-class Triple:
- s : str
- p : str
- o : str
+ try:
+ msg = response["error"]["message"]
+ tp = response["error"]["type"]
+ except:
+ raise ApplicationException(response["error"])
+
+ raise ApplicationException(f"{tp}: {msg}")
class Api:
- def __init__(self, url="http://localhost:8088/"):
+ def __init__(self, url="http://localhost:8088/", timeout=60):
self.url = url
@@ -29,356 +34,42 @@ class Api:
self.url += "api/v1/"
- def check_error(self, response):
+ self.timeout = timeout
- if "error" in response:
+ def flow(self):
+ return Flow(api=self)
- try:
- msg = response["error"]["message"]
- tp = response["error"]["message"]
- except:
- raise ApplicationException(
- "Error, but the error object is broken"
- )
+ def config(self):
+ return Config(api=self)
- raise ApplicationException(f"{tp}: {msg}")
+ def knowledge(self):
+ return Knowledge(api=self)
- def text_completion(self, system, prompt):
+ def request(self, path, request):
- # The input consists of system and prompt strings
- input = {
- "system": system,
- "prompt": prompt
- }
+ url = f"{self.url}{path}"
- url = f"{self.url}text-completion"
+# print("uri:", url)
+# print(json.dumps(request, indent=4))
# Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
+ resp = requests.post(url, json=request, timeout=self.timeout)
# Should be a 200 status code
if resp.status_code != 200:
raise ProtocolException(f"Status code {resp.status_code}")
+# print(resp.text)
+
try:
# Parse the response as JSON
object = resp.json()
except:
raise ProtocolException(f"Expected JSON response")
- self.check_error(resp)
+ check_error(object)
- try:
- return object["response"]
- except:
- raise ProtocolException(f"Response not formatted correctly")
-
- def agent(self, question):
-
- # The input consists of a question
- input = {
- "question": question
- }
-
- url = f"{self.url}agent"
-
- # Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
-
- # Should be a 200 status code
- if resp.status_code != 200:
- raise ProtocolException(f"Status code {resp.status_code}")
-
- try:
- # Parse the response as JSON
- object = resp.json()
- except:
- raise ProtocolException(f"Expected JSON response")
-
- self.check_error(resp)
-
- try:
- return object["answer"]
- except:
- raise ProtocolException(f"Response not formatted correctly")
-
- def graph_rag(
- self, question, user="trustgraph", collection="default",
- entity_limit=50, triple_limit=30, max_subgraph_size=150,
- max_path_length=2,
- ):
-
- # The input consists of a question
- input = {
- "query": question,
- "user": user,
- "collection": collection,
- "entity-limit": entity_limit,
- "triple-limit": triple_limit,
- "max-subgraph-size": max_subgraph_size,
- "max-path-length": max_path_length,
- }
-
- url = f"{self.url}graph-rag"
-
- # Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
-
- # Should be a 200 status code
- if resp.status_code != 200:
- raise ProtocolException(f"Status code {resp.status_code}")
-
- try:
- # Parse the response as JSON
- object = resp.json()
- except:
- raise ProtocolException(f"Expected JSON response")
-
- self.check_error(resp)
-
- try:
- return object["response"]
- except:
- raise ProtocolException(f"Response not formatted correctly")
-
- def document_rag(
- self, question, user="trustgraph", collection="default",
- doc_limit=10,
- ):
-
- # The input consists of a question
- input = {
- "query": question,
- "user": user,
- "collection": collection,
- "doc-limit": doc_limit,
- }
-
- url = f"{self.url}document-rag"
-
- # Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
-
- # Should be a 200 status code
- if resp.status_code != 200:
- raise ProtocolException(f"Status code {resp.status_code}")
-
- try:
- # Parse the response as JSON
- object = resp.json()
- except:
- raise ProtocolException(f"Expected JSON response")
-
- self.check_error(resp)
-
- try:
- return object["response"]
- except:
- raise ProtocolException(f"Response not formatted correctly")
-
- def embeddings(self, text):
-
- # The input consists of a text block
- input = {
- "text": text
- }
-
- url = f"{self.url}embeddings"
-
- # Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
-
- # Should be a 200 status code
- if resp.status_code != 200:
- raise ProtocolException(f"Status code {resp.status_code}")
-
- try:
- # Parse the response as JSON
- object = resp.json()
- except:
- raise ProtocolException(f"Expected JSON response")
-
- self.check_error(resp)
-
- try:
- return object["vectors"]
- except:
- raise ProtocolException(f"Response not formatted correctly")
-
- def prompt(self, id, variables):
-
- # The input consists of system and prompt strings
- input = {
- "id": id,
- "variables": variables
- }
-
- url = f"{self.url}prompt"
-
- # Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
-
- # Should be a 200 status code
- if resp.status_code != 200:
- raise ProtocolException(f"Status code {resp.status_code}")
-
- try:
- # Parse the response as JSON
- object = resp.json()
- except:
- raise ProtocolException("Expected JSON response")
-
- self.check_error(resp)
-
- if "text" in object:
- return object["text"]
-
- if "object" in object:
- try:
- return json.loads(object["object"])
- except Exception as e:
- raise ProtocolException(
- "Returned object not well-formed JSON"
- )
-
- raise ProtocolException("Response not formatted correctly")
-
- def triples_query(self, s=None, p=None, o=None, limit=10000):
-
- # The input consists of system and prompt strings
- input = {
- "limit": limit
- }
-
- if s:
- if not isinstance(s, Uri):
- raise RuntimeError("s must be Uri")
- input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
-
- if p:
- if not isinstance(p, Uri):
- raise RuntimeError("p must be Uri")
- input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
-
- if o:
- if not isinstance(o, Uri) and not isinstance(o, Literal):
- raise RuntimeError("o must be Uri or Literal")
- input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
-
- url = f"{self.url}triples-query"
-
- # Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
-
- # Should be a 200 status code
- if resp.status_code != 200:
- raise ProtocolException(f"Status code {resp.status_code}")
-
- try:
- # Parse the response as JSON
- object = resp.json()
- except:
- raise ProtocolException("Expected JSON response")
-
- self.check_error(resp)
-
- if "response" not in object:
- raise ProtocolException("Response not formatted correctly")
-
- def to_value(x):
- if x["e"]: return Uri(x["v"])
- return Literal(x["v"])
-
- return [
- Triple(
- s=to_value(t["s"]),
- p=to_value(t["p"]),
- o=to_value(t["o"])
- )
- for t in object["response"]
- ]
-
- return object["response"]
-
- def load_document(self, document, id=None, metadata=None):
-
- if id is None:
-
- if metadata is not None:
-
- # Situation makes no sense. What can the metadata possibly
- # mean if the caller doesn't know the document ID.
- # Metadata should relate to the document by ID
- raise RuntimeError("Can't specify metadata without id")
-
- id = hash(document)
-
- triples = []
-
- def emit(t):
- triples.append(t)
-
- if metadata:
- metadata.emit(
- lambda t: triples.append({
- "s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
- "p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
- "o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
- })
- )
-
- input = {
- "id": id,
- "metadata": triples,
- "data": base64.b64encode(document).decode("utf-8"),
- }
-
- url = f"{self.url}load/document"
-
- # Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
-
- # Should be a 200 status code
- if resp.status_code != 200:
- raise ProtocolException(f"Status code {resp.status_code}")
-
- def load_text(self, text, id=None, metadata=None, charset="utf-8"):
-
- if id is None:
-
- if metadata is not None:
-
- # Situation makes no sense. What can the metadata possibly
- # mean if the caller doesn't know the document ID.
- # Metadata should relate to the document by ID
- raise RuntimeError("Can't specify metadata without id")
-
- id = hash(text)
-
- triples = []
-
- if metadata:
- metadata.emit(
- lambda t: triples.append({
- "s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
- "p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
- "o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
- })
- )
-
- input = {
- "id": id,
- "metadata": triples,
- "charset": charset,
- "text": base64.b64encode(text).decode("utf-8"),
- }
-
- url = f"{self.url}load/text"
-
- # Invoke the API, input is passed as JSON
- resp = requests.post(url, json=input)
-
- # Should be a 200 status code
- if resp.status_code != 200:
- raise ProtocolException(f"Status code {resp.status_code}")
+ return object
+ def library(self):
+ return Library(self)
diff --git a/trustgraph-base/trustgraph/api/config.py b/trustgraph-base/trustgraph/api/config.py
new file mode 100644
index 00000000..7af6ab45
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/config.py
@@ -0,0 +1,97 @@
+
+from . exceptions import *
+from . types import ConfigValue
+
+class Config:
+
+ def __init__(self, api):
+ self.api = api
+
+ def request(self, request):
+ return self.api.request("config", request)
+
+ def get(self, keys):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "get",
+ "keys": [
+ { "type": k.type, "key": k.key }
+ for k in keys
+ ]
+ }
+
+ object = self.request(input)
+
+ try:
+ return [
+ ConfigValue(
+ type = v["type"],
+ key = v["key"],
+ value = v["value"]
+ )
+ for v in object["values"]
+ ]
+ except Exception as e:
+ print(e)
+ raise ProtocolException("Response not formatted correctly")
+
+ def put(self, values):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "put",
+ "values": [
+ { "type": v.type, "key": v.key, "value": v.value }
+ for v in values
+ ]
+ }
+
+ self.request(input)
+
+ def list(self, type):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "list",
+ "type": type,
+ }
+
+ return self.request(input)["directory"]
+
+ def get_values(self, type):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "getvalues",
+ "type": type,
+ }
+
+ object = self.request(input)["directory"]
+
+ try:
+ return [
+ ConfigValue(
+ type = v["type"],
+ key = v["key"],
+ value = v["value"]
+ )
+ for v in object["values"]
+ ]
+ except:
+ raise ProtocolException(f"Response not formatted correctly")
+
+ def all(self):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "config"
+ }
+
+ object = self.request(input)
+
+ try:
+ return object["config"], object["version"]
+ except:
+ raise ProtocolException(f"Response not formatted correctly")
+
diff --git a/trustgraph-base/trustgraph/api/exceptions.py b/trustgraph-base/trustgraph/api/exceptions.py
new file mode 100644
index 00000000..b3f732d4
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/exceptions.py
@@ -0,0 +1,6 @@
+
+class ProtocolException(Exception):
+ pass
+
+class ApplicationException(Exception):
+ pass
diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py
new file mode 100644
index 00000000..ba330ab7
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/flow.py
@@ -0,0 +1,360 @@
+
+import json
+import base64
+
+from .. knowledge import hash, Uri, Literal
+from . types import Triple
+
+def to_value(x):
+ if x["e"]: return Uri(x["v"])
+ return Literal(x["v"])
+
+class Flow:
+
+ def __init__(self, api):
+ self.api = api
+
+ def request(self, path=None, request=None):
+
+ if request is None:
+ raise RuntimeError("request must be specified")
+
+ if path:
+ return self.api.request(f"flow/{path}", request)
+ else:
+ return self.api.request(f"flow", request)
+
+ def id(self, id="0000"):
+ return FlowInstance(api=self, id=id)
+
+ def list_classes(self):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "list-classes",
+ }
+
+ return self.request(request = input)["class-names"]
+
+ def get_class(self, class_name):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "get-class",
+ "class-name": class_name,
+ }
+
+ return json.loads(self.request(request = input)["class-definition"])
+
+ def put_class(self, class_name, definition):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "put-class",
+ "class-name": class_name,
+ "class-definition": json.dumps(definition),
+ }
+
+ self.request(request = input)
+
+ def delete_class(self, class_name):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "delete-class",
+ "class-name": class_name,
+ }
+
+ self.request(request = input)
+
+ def list(self):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "list-flows",
+ }
+
+ return self.request(request = input)["flow-ids"]
+
+ def get(self, id):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "get-flow",
+ "flow-id": id,
+ }
+
+ return json.loads(self.request(request = input)["flow"])
+
+ def start(self, class_name, id, description):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "start-flow",
+ "flow-id": id,
+ "class-name": class_name,
+ "description": description,
+ }
+
+ self.request(request = input)
+
+ def stop(self, id):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "stop-flow",
+ "flow-id": id,
+ }
+
+ self.request(request = input)
+
+class FlowInstance:
+
+ def __init__(self, api, id):
+ self.api = api
+ self.id = id
+
+ def request(self, path, request):
+
+ return self.api.request(path = f"{self.id}/{path}", request = request)
+
+ def text_completion(self, system, prompt):
+
+ # The input consists of system and prompt strings
+ input = {
+ "system": system,
+ "prompt": prompt
+ }
+
+ return self.request(
+ "service/text-completion",
+ input
+ )["response"]
+
+ def agent(self, question):
+
+ # The input consists of a question
+ input = {
+ "question": question
+ }
+
+ return self.request(
+ "service/agent",
+ input
+ )["answer"]
+
+ def graph_rag(
+ self, question, user="trustgraph", collection="default",
+ entity_limit=50, triple_limit=30, max_subgraph_size=150,
+ max_path_length=2,
+ ):
+
+ # The input consists of a question
+ input = {
+ "query": question,
+ "user": user,
+ "collection": collection,
+ "entity-limit": entity_limit,
+ "triple-limit": triple_limit,
+ "max-subgraph-size": max_subgraph_size,
+ "max-path-length": max_path_length,
+ }
+
+ return self.request(
+ "service/graph-rag",
+ input
+ )["response"]
+
+ def document_rag(
+ self, question, user="trustgraph", collection="default",
+ doc_limit=10,
+ ):
+
+ # The input consists of a question
+ input = {
+ "query": question,
+ "user": user,
+ "collection": collection,
+ "doc-limit": doc_limit,
+ }
+
+ return self.request(
+ "service/document-rag",
+ input
+ )["response"]
+
+ def embeddings(self, text):
+
+ # The input consists of a text block
+ input = {
+ "text": text
+ }
+
+ return self.request(
+ "service/embeddings",
+ input
+ )["vectors"]
+
+ def prompt(self, id, variables):
+
+ # The input consists of system and prompt strings
+ input = {
+ "id": id,
+ "variables": variables
+ }
+
+ object = self.request(
+ "service/prompt",
+ input
+ )
+
+ if "text" in object:
+ return object["text"]
+
+ if "object" in object:
+ try:
+ return json.loads(object["object"])
+ except Exception as e:
+ raise ProtocolException(
+ "Returned object not well-formed JSON"
+ )
+
+ raise ProtocolException("Response not formatted correctly")
+
+ def triples_query(
+ self, s=None, p=None, o=None,
+ user=None, collection=None, limit=10000
+ ):
+
+ # The input consists of system and prompt strings
+ input = {
+ "limit": limit
+ }
+
+ if user:
+ input["user"] = user
+
+ if collection:
+ input["collection"] = collection
+
+ if s:
+ if not isinstance(s, Uri):
+ raise RuntimeError("s must be Uri")
+ input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
+
+ if p:
+ if not isinstance(p, Uri):
+ raise RuntimeError("p must be Uri")
+ input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
+
+ if o:
+ if not isinstance(o, Uri) and not isinstance(o, Literal):
+ raise RuntimeError("o must be Uri or Literal")
+ input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
+
+ object = self.request(
+ "service/triples",
+ input
+ )
+
+ return [
+ Triple(
+ s=to_value(t["s"]),
+ p=to_value(t["p"]),
+ o=to_value(t["o"])
+ )
+ for t in object["response"]
+ ]
+
+ def load_document(
+ self, document, id=None, metadata=None, user=None,
+ collection=None,
+ ):
+
+ if id is None:
+
+ if metadata is not None:
+
+ # Situation makes no sense. What can the metadata possibly
+ # mean if the caller doesn't know the document ID.
+ # Metadata should relate to the document by ID
+ raise RuntimeError("Can't specify metadata without id")
+
+ id = hash(document)
+
+ triples = []
+
+ def emit(t):
+ triples.append(t)
+
+ if metadata:
+ metadata.emit(
+ lambda t: triples.append({
+ "s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
+ "p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
+ "o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
+ })
+ )
+
+ input = {
+ "id": id,
+ "metadata": triples,
+ "data": base64.b64encode(document).decode("utf-8"),
+ }
+
+ if user:
+ input["user"] = user
+
+ if collection:
+ input["collection"] = collection
+
+ return self.request(
+ "service/document-load",
+ input
+ )
+
+ def load_text(
+ self, text, id=None, metadata=None, charset="utf-8",
+ user=None, collection=None,
+ ):
+
+ if id is None:
+
+ if metadata is not None:
+
+ # Situation makes no sense. What can the metadata possibly
+ # mean if the caller doesn't know the document ID.
+ # Metadata should relate to the document by ID
+ raise RuntimeError("Can't specify metadata without id")
+
+ id = hash(text)
+
+ triples = []
+
+ if metadata:
+ metadata.emit(
+ lambda t: triples.append({
+ "s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
+ "p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
+ "o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
+ })
+ )
+
+ input = {
+ "id": id,
+ "metadata": triples,
+ "charset": charset,
+ "text": base64.b64encode(text).decode("utf-8"),
+ }
+
+ if user:
+ input["user"] = user
+
+ if collection:
+ input["collection"] = collection
+
+ return self.request(
+ "service/text-load",
+ input
+ )
+
diff --git a/trustgraph-base/trustgraph/api/knowledge.py b/trustgraph-base/trustgraph/api/knowledge.py
new file mode 100644
index 00000000..724ac79c
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/knowledge.py
@@ -0,0 +1,67 @@
+
+import json
+import base64
+
+from .. knowledge import hash, Uri, Literal
+from . types import Triple
+
+def to_value(x):
+ if x["e"]: return Uri(x["v"])
+ return Literal(x["v"])
+
+class Knowledge:
+
+ def __init__(self, api):
+ self.api = api
+
+ def request(self, request):
+
+ return self.api.request(f"knowledge", request)
+
+ def list_kg_cores(self, user="trustgraph"):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "list-kg-cores",
+ "user": user,
+ }
+
+ return self.request(request = input)["ids"]
+
+ def delete_kg_core(self, id, user="trustgraph"):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "delete-kg-core",
+ "user": user,
+ "id": id,
+ }
+
+ self.request(request = input)
+
+ def load_kg_core(self, id, user="trustgraph", flow="0000",
+ collection="default"):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "load-kg-core",
+ "user": user,
+ "id": id,
+ "flow": flow,
+ "collection": collection,
+ }
+
+ self.request(request = input)
+
+ def unload_kg_core(self, id, user="trustgraph", flow="0000"):
+
+ # The input consists of system and prompt strings
+ input = {
+ "operation": "unload-kg-core",
+ "user": user,
+ "id": id,
+ "flow": flow,
+ }
+
+ self.request(request = input)
+
diff --git a/trustgraph-base/trustgraph/api/library.py b/trustgraph-base/trustgraph/api/library.py
new file mode 100644
index 00000000..c67bfeda
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/library.py
@@ -0,0 +1,271 @@
+
+import datetime
+import time
+import base64
+
+from . types import DocumentMetadata, ProcessingMetadata, Triple
+from .. knowledge import hash, Uri, Literal
+from . exceptions import *
+
+def to_value(x):
+ if x["e"]: return Uri(x["v"])
+ return Literal(x["v"])
+
+class Library:
+
+ def __init__(self, api):
+ self.api = api
+
+ def request(self, request):
+ return self.api.request(f"librarian", request)
+
+ def add_document(
+ self, document, id, metadata, user, title, comments,
+ kind="text/plain", tags=[],
+ ):
+
+ if id is None:
+
+ if metadata is not None:
+
+ # Situation makes no sense. What can the metadata possibly
+ # mean if the caller doesn't know the document ID.
+ # Metadata should relate to the document by ID
+ raise RuntimeError("Can't specify metadata without id")
+
+ id = hash(document)
+
+ if not title: title = ""
+ if not comments: comments = ""
+
+ triples = []
+
+ def emit(t):
+ triples.append(t)
+
+ if metadata:
+ if isinstance(metadata, list):
+ triples = [
+ {
+ "s": { "v": t.s, "e": isinstance(t.s, Uri) },
+ "p": { "v": t.p, "e": isinstance(t.p, Uri) },
+ "o": { "v": t.o, "e": isinstance(t.o, Uri) }
+ }
+ for t in metadata
+ ]
+ elif hasattr(metadata, "emit"):
+ metadata.emit(
+ lambda t: triples.append({
+ "s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
+ "p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
+ "o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
+ })
+ )
+ else:
+ raise RuntimeError("metadata should be a list of Triples or have an emit method")
+
+ input = {
+ "operation": "add-document",
+ "document-metadata": {
+ "id": id,
+ "time": int(time.time()),
+ "kind": kind,
+ "title": title,
+ "comments": comments,
+ "metadata": triples,
+ "user": user,
+ "tags": tags
+ },
+ "content": base64.b64encode(document).decode("utf-8"),
+ }
+
+ return self.request(input)
+
+ def get_documents(self, user):
+
+ input = {
+ "operation": "list-documents",
+ "user": user,
+ }
+
+ object = self.request(input)
+
+ try:
+ return [
+ DocumentMetadata(
+ id = v["id"],
+ time = datetime.datetime.fromtimestamp(v["time"]),
+ kind = v["kind"],
+ title = v["title"],
+ comments = v.get("comments", ""),
+ metadata = [
+ Triple(
+ s = to_value(w["s"]),
+ p = to_value(w["p"]),
+ o = to_value(w["o"])
+ )
+ for w in v["metadata"]
+ ],
+ user = v["user"],
+ tags = v["tags"]
+ )
+ for v in object["document-metadatas"]
+ ]
+ except Exception as e:
+ print(e)
+ raise ProtocolException(f"Response not formatted correctly")
+
+ def get_document(self, user, id):
+
+ input = {
+ "operation": "get-document",
+ "user": user,
+ "document-id": id,
+ }
+
+ object = self.request(input)
+ doc = object["document-metadata"]
+
+ try:
+ DocumentMetadata(
+ id = doc["id"],
+ time = datetime.datetime.fromtimestamp(doc["time"]),
+ kind = doc["kind"],
+ title = doc["title"],
+ comments = doc.get("comments", ""),
+ metadata = [
+ Triple(
+ s = to_value(w["s"]),
+ p = to_value(w["p"]),
+ o = to_value(w["o"])
+ )
+ for w in doc["metadata"]
+ ],
+ user = doc["user"],
+ tags = doc["tags"]
+ )
+ except Exception as e:
+ print(e)
+ raise ProtocolException(f"Response not formatted correctly")
+
+ def update_document(self, user, id, metadata):
+
+ input = {
+ "operation": "update-document",
+ "document-metadata": {
+ "user": user,
+ "document-id": id,
+ "time": metadata.time,
+ "title": metadata.title,
+ "comments": metadata.comments,
+ "metadata": [
+ {
+ "s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
+ "p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
+ "o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
+ }
+ for t in metadata.metadata
+ ],
+ "tags": metadata.tags,
+ }
+ }
+
+ object = self.request(input)
+ doc = object["document-metadata"]
+
+ try:
+ DocumentMetadata(
+ id = doc["id"],
+ time = datetime.datetime.fromtimestamp(doc["time"]),
+ kind = doc["kind"],
+ title = doc["title"],
+ comments = doc.get("comments", ""),
+ metadata = [
+ Triple(
+ s = to_value(w["s"]),
+ p = to_value(w["p"]),
+ o = to_value(w["o"])
+ )
+ for w in doc["metadata"]
+ ],
+ user = doc["user"],
+ tags = doc["tags"]
+ )
+ except Exception as e:
+ print(e)
+ raise ProtocolException(f"Response not formatted correctly")
+
+ def remove_document(self, user, id):
+
+ input = {
+ "operation": "remove-document",
+ "user": user,
+ "document-id": id,
+ }
+
+ object = self.request(input)
+
+ return {}
+
+ def start_processing(
+ self, id, document_id, flow="0000",
+ user="trustgraph", collection="default", tags=[],
+ ):
+
+ input = {
+ "operation": "add-processing",
+ "processing-metadata": {
+ "id": id,
+ "document-id": document_id,
+ "time": int(time.time()),
+ "flow": flow,
+ "user": user,
+ "collection": collection,
+ "tags": tags,
+ }
+ }
+
+ object = self.request(input)
+
+ return {}
+
+ def stop_processing(
+ self, id, user="trustgraph",
+ ):
+
+ input = {
+ "operation": "remove-processing",
+ "processing-id": id,
+ "user": user,
+ }
+
+ object = self.request(input)
+
+ return {}
+
+ def get_processings(self, user="trustgraph"):
+
+ input = {
+ "operation": "list-processing",
+ "user": user,
+ }
+
+ object = self.request(input)
+
+ try:
+ return [
+ ProcessingMetadata(
+ id = v["id"],
+ document_id = v["document-id"],
+ time = datetime.datetime.fromtimestamp(v["time"]),
+ flow = v["flow"],
+ user = v["user"],
+ collection = v["collection"],
+ tags = v["tags"],
+ )
+ for v in object["processing-metadatas"]
+ ]
+ except Exception as e:
+ print(e)
+ raise ProtocolException(f"Response not formatted correctly")
+
diff --git a/trustgraph-base/trustgraph/api/types.py b/trustgraph-base/trustgraph/api/types.py
new file mode 100644
index 00000000..fe3472b1
--- /dev/null
+++ b/trustgraph-base/trustgraph/api/types.py
@@ -0,0 +1,43 @@
+
+import dataclasses
+import datetime
+from typing import List
+from .. knowledge import hash, Uri, Literal
+
+@dataclasses.dataclass
+class Triple:
+ s : str
+ p : str
+ o : str
+
+@dataclasses.dataclass
+class ConfigKey:
+ type : str
+ key : str
+
+@dataclasses.dataclass
+class ConfigValue:
+ type : str
+ key : str
+ value : str
+
+@dataclasses.dataclass
+class DocumentMetadata:
+ id : str
+ time : datetime.datetime
+ kind : str
+ title : str
+ comments : str
+ metadata : List[Triple]
+ user : str
+ tags : List[str]
+
+@dataclasses.dataclass
+class ProcessingMetadata:
+ id : str
+ document_id : str
+ time : datetime.datetime
+ flow : str
+ user : str
+ collection : str
+ tags : List[str]
diff --git a/trustgraph-base/trustgraph/base/__init__.py b/trustgraph-base/trustgraph/base/__init__.py
index 3a58d51e..2accbb21 100644
--- a/trustgraph-base/trustgraph/base/__init__.py
+++ b/trustgraph-base/trustgraph/base/__init__.py
@@ -1,8 +1,31 @@
-from . base_processor import BaseProcessor
+from . pubsub import PulsarClient
+from . async_processor import AsyncProcessor
from . consumer import Consumer
from . producer import Producer
-from . consumer_producer import ConsumerProducer
from . publisher import Publisher
from . subscriber import Subscriber
+from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
+from . flow_processor import FlowProcessor
+from . consumer_spec import ConsumerSpec
+from . setting_spec import SettingSpec
+from . producer_spec import ProducerSpec
+from . subscriber_spec import SubscriberSpec
+from . request_response_spec import RequestResponseSpec
+from . llm_service import LlmService, LlmResult
+from . embeddings_service import EmbeddingsService
+from . embeddings_client import EmbeddingsClientSpec
+from . text_completion_client import TextCompletionClientSpec
+from . prompt_client import PromptClientSpec
+from . triples_store_service import TriplesStoreService
+from . graph_embeddings_store_service import GraphEmbeddingsStoreService
+from . document_embeddings_store_service import DocumentEmbeddingsStoreService
+from . triples_query_service import TriplesQueryService
+from . graph_embeddings_query_service import GraphEmbeddingsQueryService
+from . document_embeddings_query_service import DocumentEmbeddingsQueryService
+from . graph_embeddings_client import GraphEmbeddingsClientSpec
+from . triples_client import TriplesClientSpec
+from . document_embeddings_client import DocumentEmbeddingsClientSpec
+from . agent_service import AgentService
+from . graph_rag_client import GraphRagClientSpec
diff --git a/trustgraph-base/trustgraph/base/agent_client.py b/trustgraph-base/trustgraph/base/agent_client.py
new file mode 100644
index 00000000..76e1adff
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/agent_client.py
@@ -0,0 +1,39 @@
+
+from . request_response_spec import RequestResponse, RequestResponseSpec
+from .. schema import AgentRequest, AgentResponse
+from .. knowledge import Uri, Literal
+
+class AgentClient(RequestResponse):
+ async def request(self, recipient, question, plan=None, state=None,
+ history=[], timeout=300):
+
+ resp = await self.request(
+ AgentRequest(
+ question = question,
+ plan = plan,
+ state = state,
+ history = history,
+ ),
+ recipient=recipient,
+ timeout=timeout,
+ )
+
+ print(resp, flush=True)
+
+ if resp.error:
+ raise RuntimeError(resp.error.message)
+
+ return resp
+
+class GraphEmbeddingsClientSpec(RequestResponseSpec):
+ def __init__(
+ self, request_name, response_name,
+ ):
+ super(GraphEmbeddingsClientSpec, self).__init__(
+ request_name = request_name,
+ request_schema = GraphEmbeddingsRequest,
+ response_name = response_name,
+ response_schema = GraphEmbeddingsResponse,
+ impl = GraphEmbeddingsClient,
+ )
+
diff --git a/trustgraph-base/trustgraph/base/agent_service.py b/trustgraph-base/trustgraph/base/agent_service.py
new file mode 100644
index 00000000..0dbe728e
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/agent_service.py
@@ -0,0 +1,100 @@
+
+"""
+Agent manager service completion base class
+"""
+
+import time
+from prometheus_client import Histogram
+
+from .. schema import AgentRequest, AgentResponse, Error
+from .. exceptions import TooManyRequests
+from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
+
+default_ident = "agent-manager"
+
+class AgentService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(AgentService, self).__init__(**params | { "id": id })
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = AgentRequest,
+ handler = self.on_request
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "next",
+ schema = AgentRequest
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = AgentResponse
+ )
+ )
+
+ async def on_request(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ # Sender-produced ID
+ id = msg.properties()["id"]
+
+ async def respond(resp):
+
+ await flow("response").send(
+ resp,
+ properties={"id": id}
+ )
+
+ async def next(resp):
+
+ await flow("next").send(
+ resp,
+ properties={"id": id}
+ )
+
+ await self.agent_request(
+ request = request, respond = respond, next = next,
+ flow = flow
+ )
+
+ except TooManyRequests as e:
+ raise e
+
+ except Exception as e:
+
+ # Apart from rate limits, treat all exceptions as unrecoverable
+ print(f"on_request Exception: {e}")
+
+ print("Send error response...", flush=True)
+
+ await flow.producer["response"].send(
+ AgentResponse(
+ error=Error(
+ type = "agent-error",
+ message = str(e),
+ ),
+ thought = None,
+ observation = None,
+ answer = None,
+ ),
+ properties={"id": id}
+ )
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
diff --git a/trustgraph-base/trustgraph/base/async_processor.py b/trustgraph-base/trustgraph/base/async_processor.py
new file mode 100644
index 00000000..ba1d4e1a
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/async_processor.py
@@ -0,0 +1,257 @@
+
+# Base class for processors. Implements:
+# - Pulsar client, subscribe and consume basic
+# - the async startup logic
+# - Initialising metrics
+
+import asyncio
+import argparse
+import _pulsar
+import time
+import uuid
+from prometheus_client import start_http_server, Info
+
+from .. schema import ConfigPush, config_push_queue
+from .. log_level import LogLevel
+from .. exceptions import TooManyRequests
+from . pubsub import PulsarClient
+from . producer import Producer
+from . consumer import Consumer
+from . metrics import ProcessorMetrics, ConsumerMetrics
+
+default_config_queue = config_push_queue
+
+# Async processor
+class AsyncProcessor:
+
+ def __init__(self, **params):
+
+ # Store the identity
+ self.id = params.get("id")
+
+ # Register a pulsar client
+ self.pulsar_client_object = PulsarClient(**params)
+
+ # Initialise metrics, records the parameters
+ ProcessorMetrics(processor = self.id).info({
+ k: str(params[k])
+ for k in params
+ if k != "id"
+ })
+
+ # The processor runs all activity in a taskgroup, it's mandatory
+ # that this is provded
+ self.taskgroup = params.get("taskgroup")
+ if self.taskgroup is None:
+ raise RuntimeError("Essential taskgroup missing")
+
+ # Get the configuration topic
+ self.config_push_queue = params.get(
+ "config_push_queue", default_config_queue
+ )
+
+ # This records registered configuration handlers
+ self.config_handlers = []
+
+ # Create a random ID for this subscription to the configuration
+ # service
+ config_subscriber_id = str(uuid.uuid4())
+
+ config_consumer_metrics = ConsumerMetrics(
+ processor = self.id, flow = None, name = "config",
+ )
+
+ # Subscribe to config queue
+ self.config_sub_task = Consumer(
+
+ taskgroup = self.taskgroup,
+ client = self.pulsar_client,
+ subscriber = config_subscriber_id,
+ flow = None,
+
+ topic = self.config_push_queue,
+ schema = ConfigPush,
+
+ handler = self.on_config_change,
+
+ metrics = config_consumer_metrics,
+
+ # This causes new subscriptions to view the entire history of
+ # configuration
+ start_of_messages = True
+ )
+
+ self.running = True
+
+ # This is called to start dynamic behaviour. An over-ride point for
+ # extra functionality
+ async def start(self):
+ await self.config_sub_task.start()
+
+ # This is called to stop all threads. An over-ride point for extra
+ # functionality
+ def stop(self):
+ self.pulsar_client.close()
+ self.running = False
+
+ # Returns the pulsar host
+ @property
+ def pulsar_host(self): return self.pulsar_client_object.pulsar_host
+
+ # Returns the pulsar client
+ @property
+ def pulsar_client(self): return self.pulsar_client_object.client
+
+ # Register a new event handler for configuration change
+ def register_config_handler(self, handler):
+ self.config_handlers.append(handler)
+
+ # Called when a new configuration message push occurs
+ async def on_config_change(self, message, consumer, flow):
+
+ # Get configuration data and version number
+ config = message.value().config
+ version = message.value().version
+
+ # Invoke message handlers
+ print("Config change event", version, flush=True)
+ for ch in self.config_handlers:
+ await ch(config, version)
+
+ # This is the 'main' body of the handler. It is a point to override
+ # if needed. By default does nothing. Processors are implemented
+ # by adding consumer/producer functionality so maybe nothing is needed
+ # in the run() body
+ async def run(self):
+ while self.running:
+ await asyncio.sleep(2)
+
+ # Startup fabric. This runs in 'async' mode, creates a taskgroup and
+ # runs the producer.
+ @classmethod
+ async def launch_async(cls, args):
+
+ try:
+
+ # Create a taskgroup. This seems complicated, when an exception
+ # occurs, unhandled it looks like it cancels all threads in the
+ # taskgroup. Needs the exception to be caught in the right
+ # place.
+ async with asyncio.TaskGroup() as tg:
+
+
+ # Create a processor instance, and include the taskgroup
+ # as a paramter. A processor identity ident is used as
+ # - subscriber name
+ # - an identifier for flow configuration
+ p = cls(**args | { "taskgroup": tg })
+
+ # Start the processor
+ await p.start()
+
+ # Run the processor
+ task = tg.create_task(p.run())
+
+ # The taskgroup causes everything to wait until
+ # all threads have stopped
+
+ # This is here to output a debug message, shouldn't be needed.
+ except Exception as e:
+ print("Exception, closing taskgroup", flush=True)
+ raise e
+
+ # Startup fabric. launch calls launch_async in async mode.
+ @classmethod
+ def launch(cls, ident, doc):
+
+ # Start assembling CLI arguments
+ parser = argparse.ArgumentParser(
+ prog=ident,
+ description=doc
+ )
+
+ parser.add_argument(
+ '--id',
+ default=ident,
+ help=f'Configuration identity (default: {ident})',
+ )
+
+ # Invoke the class-specific add_args, which manages adding all the
+ # command-line arguments
+ cls.add_args(parser)
+
+ # Parse arguments
+ args = parser.parse_args()
+ args = vars(args)
+
+ # Debug
+ print(args, flush=True)
+
+ # Start the Prometheus metrics service if needed
+ if args["metrics"]:
+ start_http_server(args["metrics_port"])
+
+ # Loop forever, exception handler
+ while True:
+
+ print("Starting...", flush=True)
+
+ try:
+
+ # Launch the processor in an asyncio handler
+ asyncio.run(cls.launch_async(
+ args
+ ))
+
+ except KeyboardInterrupt:
+ print("Keyboard interrupt.", flush=True)
+ return
+
+ except _pulsar.Interrupted:
+ print("Pulsar Interrupted.", flush=True)
+ return
+
+ # Exceptions from a taskgroup come in as an exception group
+ except ExceptionGroup as e:
+
+ print("Exception group:", flush=True)
+
+ for se in e.exceptions:
+ print(" Type:", type(se), flush=True)
+ print(f" Exception: {se}", flush=True)
+
+ except Exception as e:
+ print("Type:", type(e), flush=True)
+ print("Exception:", e, flush=True)
+
+ # Retry occurs here
+ print("Will retry...", flush=True)
+ time.sleep(4)
+ print("Retrying...", flush=True)
+
+ # The command-line arguments are built using a stack of add_args
+ # invocations
+ @staticmethod
+ def add_args(parser):
+
+ PulsarClient.add_args(parser)
+
+ parser.add_argument(
+ '--config-queue',
+ default=default_config_queue,
+ help=f'Config push queue {default_config_queue}',
+ )
+
+ parser.add_argument(
+ '--metrics',
+ action=argparse.BooleanOptionalAction,
+ default=True,
+ help=f'Metrics enabled (default: true)',
+ )
+
+ parser.add_argument(
+ '-P', '--metrics-port',
+ type=int,
+ default=8000,
+ help=f'Pulsar host (default: 8000)',
+ )
diff --git a/trustgraph-base/trustgraph/base/base_processor.py b/trustgraph-base/trustgraph/base/base_processor.py
deleted file mode 100644
index a8374538..00000000
--- a/trustgraph-base/trustgraph/base/base_processor.py
+++ /dev/null
@@ -1,156 +0,0 @@
-
-import asyncio
-import os
-import argparse
-import pulsar
-import _pulsar
-import time
-from prometheus_client import start_http_server, Info
-
-from .. log_level import LogLevel
-
-class BaseProcessor:
-
- default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
- default_pulsar_api_key = os.getenv("PULSAR_API_KEY", None)
-
- def __init__(self, **params):
-
- self.client = None
-
- if not hasattr(__class__, "params_metric"):
- __class__.params_metric = Info(
- 'params', 'Parameters configuration'
- )
-
- # FIXME: Maybe outputs information it should not
- __class__.params_metric.info({
- k: str(params[k])
- for k in params
- })
-
- pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
- pulsar_listener = params.get("pulsar_listener", None)
- pulsar_api_key = params.get("pulsar_api_key", None)
- log_level = params.get("log_level", LogLevel.INFO)
-
- self.pulsar_host = pulsar_host
- self.pulsar_api_key = pulsar_api_key
-
- if pulsar_api_key:
- auth = pulsar.AuthenticationToken(pulsar_api_key)
- self.client = pulsar.Client(
- pulsar_host,
- authentication=auth,
- logger=pulsar.ConsoleLogger(log_level.to_pulsar())
- )
- else:
- self.client = pulsar.Client(
- pulsar_host,
- listener_name=pulsar_listener,
- logger=pulsar.ConsoleLogger(log_level.to_pulsar())
- )
-
- self.pulsar_listener = pulsar_listener
-
- def __del__(self):
-
- if hasattr(self, "client"):
- if self.client:
- self.client.close()
-
- @staticmethod
- def add_args(parser):
-
- parser.add_argument(
- '-p', '--pulsar-host',
- default=__class__.default_pulsar_host,
- help=f'Pulsar host (default: {__class__.default_pulsar_host})',
- )
-
- parser.add_argument(
- '--pulsar-api-key',
- default=__class__.default_pulsar_api_key,
- help=f'Pulsar API key',
- )
-
- parser.add_argument(
- '--pulsar-listener',
- help=f'Pulsar listener (default: none)',
- )
-
- parser.add_argument(
- '-l', '--log-level',
- type=LogLevel,
- default=LogLevel.INFO,
- choices=list(LogLevel),
- help=f'Output queue (default: info)'
- )
-
- parser.add_argument(
- '--metrics',
- action=argparse.BooleanOptionalAction,
- default=True,
- help=f'Metrics enabled (default: true)',
- )
-
- parser.add_argument(
- '-P', '--metrics-port',
- type=int,
- default=8000,
- help=f'Pulsar host (default: 8000)',
- )
-
- async def start(self):
- pass
-
- async def run(self):
- raise RuntimeError("Something should have implemented the run method")
-
- @classmethod
- async def launch_async(cls, args):
- p = cls(**args)
- await p.start()
- await p.run()
-
- @classmethod
- def launch(cls, prog, doc):
-
- parser = argparse.ArgumentParser(
- prog=prog,
- description=doc
- )
-
- cls.add_args(parser)
-
- args = parser.parse_args()
- args = vars(args)
-
- print(args)
-
- if args["metrics"]:
- start_http_server(args["metrics_port"])
-
- while True:
-
- try:
-
- asyncio.run(cls.launch_async(args))
-
- except KeyboardInterrupt:
- print("Keyboard interrupt.")
- return
-
- except _pulsar.Interrupted:
- print("Pulsar Interrupted.")
- return
-
- except Exception as e:
-
- print(type(e))
-
- print("Exception:", e, flush=True)
- print("Will retry...", flush=True)
-
- time.sleep(4)
-
diff --git a/trustgraph-base/trustgraph/base/consumer.py b/trustgraph-base/trustgraph/base/consumer.py
index 175f1fd7..162e10eb 100644
--- a/trustgraph-base/trustgraph/base/consumer.py
+++ b/trustgraph-base/trustgraph/base/consumer.py
@@ -1,93 +1,143 @@
-import asyncio
from pulsar.schema import JsonSchema
import pulsar
-from prometheus_client import Histogram, Info, Counter, Enum
+import _pulsar
+import asyncio
import time
-from . base_processor import BaseProcessor
from .. exceptions import TooManyRequests
-default_rate_limit_retry = 10
-default_rate_limit_timeout = 7200
+class Consumer:
-class Consumer(BaseProcessor):
+ def __init__(
+ self, taskgroup, flow, client, topic, subscriber, schema,
+ handler,
+ metrics = None,
+ start_of_messages=False,
+ rate_limit_retry_time = 10, rate_limit_timeout = 7200,
+ reconnect_time = 5,
+ ):
- def __init__(self, **params):
+ self.taskgroup = taskgroup
+ self.flow = flow
+ self.client = client
+ self.topic = topic
+ self.subscriber = subscriber
+ self.schema = schema
+ self.handler = handler
- if not hasattr(__class__, "state_metric"):
- __class__.state_metric = Enum(
- 'processor_state', 'Processor state',
- states=['starting', 'running', 'stopped']
- )
- __class__.state_metric.state('starting')
+ self.rate_limit_retry_time = rate_limit_retry_time
+ self.rate_limit_timeout = rate_limit_timeout
- __class__.state_metric.state('starting')
+ self.reconnect_time = 5
- super(Consumer, self).__init__(**params)
+ self.start_of_messages = start_of_messages
- self.input_queue = params.get("input_queue")
- self.subscriber = params.get("subscriber")
- self.input_schema = params.get("input_schema")
+ self.running = True
+ self.task = None
- self.rate_limit_retry = params.get(
- "rate_limit_retry", default_rate_limit_retry
- )
- self.rate_limit_timeout = params.get(
- "rate_limit_timeout", default_rate_limit_timeout
- )
+ self.metrics = metrics
- if self.input_schema == None:
- raise RuntimeError("input_schema must be specified")
+ self.consumer = None
- if not hasattr(__class__, "request_metric"):
- __class__.request_metric = Histogram(
- 'request_latency', 'Request latency (seconds)'
- )
+ def __del__(self):
+ self.running = False
- if not hasattr(__class__, "pubsub_metric"):
- __class__.pubsub_metric = Info(
- 'pubsub', 'Pub/sub configuration'
- )
+ if hasattr(self, "consumer"):
+ if self.consumer:
+ self.consumer.unsubscribe()
+ self.consumer.close()
+ self.consumer = None
- if not hasattr(__class__, "processing_metric"):
- __class__.processing_metric = Counter(
- 'processing_count', 'Processing count', ["status"]
- )
+ async def stop(self):
- if not hasattr(__class__, "rate_limit_metric"):
- __class__.rate_limit_metric = Counter(
- 'rate_limit_count', 'Rate limit event count',
- )
+ self.running = False
+ await self.task
- __class__.pubsub_metric.info({
- "input_queue": self.input_queue,
- "subscriber": self.subscriber,
- "input_schema": self.input_schema.__name__,
- "rate_limit_retry": str(self.rate_limit_retry),
- "rate_limit_timeout": str(self.rate_limit_timeout),
- })
+ async def start(self):
- self.consumer = self.client.subscribe(
- self.input_queue, self.subscriber,
- consumer_type=pulsar.ConsumerType.Shared,
- schema=JsonSchema(self.input_schema),
- )
+ self.running = True
- print("Initialised consumer.", flush=True)
+ # Puts it in the stopped state, the run thread should set running
+ if self.metrics:
+ self.metrics.state("stopped")
+
+ self.task = self.taskgroup.create_task(self.run())
async def run(self):
- __class__.state_metric.state('running')
+ while self.running:
- while True:
+ if self.metrics:
+ self.metrics.state("stopped")
- msg = self.consumer.receive()
+ try:
+
+ print(self.topic, "subscribing...", flush=True)
+
+ if self.start_of_messages:
+ pos = pulsar.InitialPosition.Earliest
+ else:
+ pos = pulsar.InitialPosition.Latest
+
+ self.consumer = await asyncio.to_thread(
+ self.client.subscribe,
+ topic = self.topic,
+ subscription_name = self.subscriber,
+ schema = JsonSchema(self.schema),
+ initial_position = pos,
+ consumer_type = pulsar.ConsumerType.Shared,
+ )
+
+ except Exception as e:
+
+ print("consumer subs Exception:", e, flush=True)
+ await asyncio.sleep(self.reconnect_time)
+ continue
+
+ print(self.topic, "subscribed", flush=True)
+
+ if self.metrics:
+ self.metrics.state("running")
+
+ try:
+
+ await self.consume()
+
+ if self.metrics:
+ self.metrics.state("stopped")
+
+ except Exception as e:
+
+ print("consumer loop exception:", e, flush=True)
+ self.consumer.unsubscribe()
+ self.consumer.close()
+ self.consumer = None
+ await asyncio.sleep(self.reconnect_time)
+ continue
+
+ if self.consumer:
+ self.consumer.unsubscribe()
+ self.consumer.close()
+
+ async def consume(self):
+
+ while self.running:
+
+ try:
+ msg = await asyncio.to_thread(
+ self.consumer.receive,
+ timeout_millis=2000
+ )
+ except _pulsar.Timeout:
+ continue
+ except Exception as e:
+ raise e
expiry = time.time() + self.rate_limit_timeout
# This loop is for retry on rate-limit / resource limits
- while True:
+ while self.running:
if time.time() > expiry:
@@ -97,20 +147,31 @@ class Consumer(BaseProcessor):
# be retried
self.consumer.negative_acknowledge(msg)
- __class__.processing_metric.labels(status="error").inc()
+ if self.metrics:
+ self.metrics.process("error")
# Break out of retry loop, processes next message
break
try:
- with __class__.request_metric.time():
- await self.handle(msg)
+ print("Handle...", flush=True)
+
+ if self.metrics:
+
+ with self.metrics.record_time():
+ await self.handler(msg, self, self.flow)
+
+ else:
+ await self.handler(msg, self, self.flow)
+
+ print("Handled.", flush=True)
# Acknowledge successful processing of the message
self.consumer.acknowledge(msg)
- __class__.processing_metric.labels(status="success").inc()
+ if self.metrics:
+ self.metrics.process("success")
# Break out of retry loop
break
@@ -119,55 +180,25 @@ class Consumer(BaseProcessor):
print("TooManyRequests: will retry...", flush=True)
- __class__.rate_limit_metric.inc()
+ if self.metrics:
+ self.metrics.rate_limit()
# Sleep
- time.sleep(self.rate_limit_retry)
+ await asyncio.sleep(self.rate_limit_retry_time)
# Contine from retry loop, just causes a reprocessing
continue
-
+
except Exception as e:
- print("Exception:", e, flush=True)
+ print("consume exception:", e, flush=True)
# Message failed to be processed, this causes it to
# be retried
self.consumer.negative_acknowledge(msg)
- __class__.processing_metric.labels(status="error").inc()
+ if self.metrics:
+ self.metrics.process("error")
# Break out of retry loop, processes next message
break
-
- @staticmethod
- def add_args(parser, default_input_queue, default_subscriber):
-
- BaseProcessor.add_args(parser)
-
- parser.add_argument(
- '-i', '--input-queue',
- default=default_input_queue,
- help=f'Input queue (default: {default_input_queue})'
- )
-
- parser.add_argument(
- '-s', '--subscriber',
- default=default_subscriber,
- help=f'Queue subscriber name (default: {default_subscriber})'
- )
-
- parser.add_argument(
- '--rate-limit-retry',
- type=int,
- default=default_rate_limit_retry,
- help=f'Rate limit retry (default: {default_rate_limit_retry})'
- )
-
- parser.add_argument(
- '--rate-limit-timeout',
- type=int,
- default=default_rate_limit_timeout,
- help=f'Rate limit timeout (default: {default_rate_limit_timeout})'
- )
-
diff --git a/trustgraph-base/trustgraph/base/consumer_producer.py b/trustgraph-base/trustgraph/base/consumer_producer.py
deleted file mode 100644
index 1006f9b5..00000000
--- a/trustgraph-base/trustgraph/base/consumer_producer.py
+++ /dev/null
@@ -1,62 +0,0 @@
-
-from pulsar.schema import JsonSchema
-import pulsar
-from prometheus_client import Histogram, Info, Counter, Enum
-import time
-
-from . consumer import Consumer
-from .. exceptions import TooManyRequests
-
-class ConsumerProducer(Consumer):
-
- def __init__(self, **params):
-
- super(ConsumerProducer, self).__init__(**params)
-
- self.output_queue = params.get("output_queue")
- self.output_schema = params.get("output_schema")
-
- if not hasattr(__class__, "output_metric"):
- __class__.output_metric = Counter(
- 'output_count', 'Output items created'
- )
-
- __class__.pubsub_metric.info({
- "input_queue": self.input_queue,
- "output_queue": self.output_queue,
- "subscriber": self.subscriber,
- "input_schema": self.input_schema.__name__,
- "output_schema": self.output_schema.__name__,
- "rate_limit_retry": str(self.rate_limit_retry),
- "rate_limit_timeout": str(self.rate_limit_timeout),
- })
-
- if self.output_schema == None:
- raise RuntimeError("output_schema must be specified")
-
- self.producer = self.client.create_producer(
- topic=self.output_queue,
- schema=JsonSchema(self.output_schema),
- chunking_enabled=True,
- )
-
- print("Initialised consumer/producer.")
-
- async def send(self, msg, properties={}):
- self.producer.send(msg, properties)
- __class__.output_metric.inc()
-
- @staticmethod
- def add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- ):
-
- Consumer.add_args(parser, default_input_queue, default_subscriber)
-
- parser.add_argument(
- '-o', '--output-queue',
- default=default_output_queue,
- help=f'Output queue (default: {default_output_queue})'
- )
-
diff --git a/trustgraph-base/trustgraph/base/consumer_spec.py b/trustgraph-base/trustgraph/base/consumer_spec.py
new file mode 100644
index 00000000..93665476
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/consumer_spec.py
@@ -0,0 +1,36 @@
+
+from . metrics import ConsumerMetrics
+from . consumer import Consumer
+from . spec import Spec
+
+class ConsumerSpec(Spec):
+ def __init__(self, name, schema, handler):
+ self.name = name
+ self.schema = schema
+ self.handler = handler
+
+ def add(self, flow, processor, definition):
+
+ consumer_metrics = ConsumerMetrics(
+ processor = flow.id, flow = flow.name, name = self.name,
+ )
+
+ consumer = Consumer(
+ taskgroup = processor.taskgroup,
+ flow = flow,
+ client = processor.pulsar_client,
+ topic = definition[self.name],
+ subscriber = processor.id + "--" + flow.name + "--" + self.name,
+ schema = self.schema,
+ handler = self.handler,
+ metrics = consumer_metrics,
+ )
+
+ # Consumer handle gets access to producers and other
+ # metadata
+ consumer.id = flow.id
+ consumer.name = self.name
+ consumer.flow = flow
+
+ flow.consumer[self.name] = consumer
+
diff --git a/trustgraph-base/trustgraph/base/document_embeddings_client.py b/trustgraph-base/trustgraph/base/document_embeddings_client.py
new file mode 100644
index 00000000..86370c52
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/document_embeddings_client.py
@@ -0,0 +1,38 @@
+
+from . request_response_spec import RequestResponse, RequestResponseSpec
+from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
+from .. knowledge import Uri, Literal
+
+class DocumentEmbeddingsClient(RequestResponse):
+ async def query(self, vectors, limit=20, user="trustgraph",
+ collection="default", timeout=30):
+
+ resp = await self.request(
+ DocumentEmbeddingsRequest(
+ vectors = vectors,
+ limit = limit,
+ user = user,
+ collection = collection
+ ),
+ timeout=timeout
+ )
+
+ print(resp, flush=True)
+
+ if resp.error:
+ raise RuntimeError(resp.error.message)
+
+ return resp.documents
+
+class DocumentEmbeddingsClientSpec(RequestResponseSpec):
+ def __init__(
+ self, request_name, response_name,
+ ):
+ super(DocumentEmbeddingsClientSpec, self).__init__(
+ request_name = request_name,
+ request_schema = DocumentEmbeddingsRequest,
+ response_name = response_name,
+ response_schema = DocumentEmbeddingsResponse,
+ impl = DocumentEmbeddingsClient,
+ )
+
diff --git a/trustgraph-base/trustgraph/base/document_embeddings_query_service.py b/trustgraph-base/trustgraph/base/document_embeddings_query_service.py
new file mode 100644
index 00000000..0dee7001
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/document_embeddings_query_service.py
@@ -0,0 +1,84 @@
+
+"""
+Document embeddings query service. Input is vectors. Output is list of
+embeddings.
+"""
+
+from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
+from .. schema import Error, Value
+
+from . flow_processor import FlowProcessor
+from . consumer_spec import ConsumerSpec
+from . producer_spec import ProducerSpec
+
+default_ident = "ge-query"
+
+class DocumentEmbeddingsQueryService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(DocumentEmbeddingsQueryService, self).__init__(
+ **params | { "id": id }
+ )
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = DocumentEmbeddingsRequest,
+ handler = self.on_message
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = DocumentEmbeddingsResponse,
+ )
+ )
+
+ async def on_message(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ # Sender-produced ID
+ id = msg.properties()["id"]
+
+ print(f"Handling input {id}...", flush=True)
+
+ docs = await self.query_document_embeddings(request)
+
+ print("Send response...", flush=True)
+ r = DocumentEmbeddingsResponse(documents=docs, error=None)
+ await flow("response").send(r, properties={"id": id})
+
+ print("Done.", flush=True)
+
+ except Exception as e:
+
+ print(f"Exception: {e}")
+
+ print("Send error response...", flush=True)
+
+ r = DocumentEmbeddingsResponse(
+ error=Error(
+ type = "document-embeddings-query-error",
+ message = str(e),
+ ),
+ response=None,
+ )
+
+ await flow("response").send(r, properties={"id": id})
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
+def run():
+
+ Processor.launch(default_ident, __doc__)
+
diff --git a/trustgraph-base/trustgraph/base/document_embeddings_store_service.py b/trustgraph-base/trustgraph/base/document_embeddings_store_service.py
new file mode 100644
index 00000000..fbf58869
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/document_embeddings_store_service.py
@@ -0,0 +1,50 @@
+
+"""
+Document embeddings store base class
+"""
+
+from .. schema import DocumentEmbeddings
+from .. base import FlowProcessor, ConsumerSpec
+from .. exceptions import TooManyRequests
+
+default_ident = "document-embeddings-write"
+
+class DocumentEmbeddingsStoreService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(DocumentEmbeddingsStoreService, self).__init__(
+ **params | { "id": id }
+ )
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = DocumentEmbeddings,
+ handler = self.on_message
+ )
+ )
+
+ async def on_message(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ await self.store_document_embeddings(request)
+
+ except TooManyRequests as e:
+ raise e
+
+ except Exception as e:
+
+ print(f"Exception: {e}")
+ raise e
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
diff --git a/trustgraph-base/trustgraph/base/embeddings_client.py b/trustgraph-base/trustgraph/base/embeddings_client.py
new file mode 100644
index 00000000..ceb08eb2
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/embeddings_client.py
@@ -0,0 +1,31 @@
+
+from . request_response_spec import RequestResponse, RequestResponseSpec
+from .. schema import EmbeddingsRequest, EmbeddingsResponse
+
+class EmbeddingsClient(RequestResponse):
+ async def embed(self, text, timeout=30):
+
+ resp = await self.request(
+ EmbeddingsRequest(
+ text = text
+ ),
+ timeout=timeout
+ )
+
+ if resp.error:
+ raise RuntimeError(resp.error.message)
+
+ return resp.vectors
+
+class EmbeddingsClientSpec(RequestResponseSpec):
+ def __init__(
+ self, request_name, response_name,
+ ):
+ super(EmbeddingsClientSpec, self).__init__(
+ request_name = request_name,
+ request_schema = EmbeddingsRequest,
+ response_name = response_name,
+ response_schema = EmbeddingsResponse,
+ impl = EmbeddingsClient,
+ )
+
diff --git a/trustgraph-base/trustgraph/base/embeddings_service.py b/trustgraph-base/trustgraph/base/embeddings_service.py
new file mode 100644
index 00000000..c6befdb7
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/embeddings_service.py
@@ -0,0 +1,90 @@
+
+"""
+Embeddings resolution base class
+"""
+
+import time
+from prometheus_client import Histogram
+
+from .. schema import EmbeddingsRequest, EmbeddingsResponse, Error
+from .. exceptions import TooManyRequests
+from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
+
+default_ident = "embeddings"
+
+class EmbeddingsService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(EmbeddingsService, self).__init__(**params | { "id": id })
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = EmbeddingsRequest,
+ handler = self.on_request
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = EmbeddingsResponse
+ )
+ )
+
+ async def on_request(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ # Sender-produced ID
+
+ id = msg.properties()["id"]
+
+ print("Handling request", id, "...", flush=True)
+
+ vectors = await self.on_embeddings(request.text)
+
+ await flow("response").send(
+ EmbeddingsResponse(
+ error = None,
+ vectors = vectors,
+ ),
+ properties={"id": id}
+ )
+
+ print("Handled.", flush=True)
+
+ except TooManyRequests as e:
+ raise e
+
+ except Exception as e:
+
+ # Apart from rate limits, treat all exceptions as unrecoverable
+
+ print(f"Exception: {e}", flush=True)
+
+ print("Send error response...", flush=True)
+
+ await flow.producer["response"].send(
+ EmbeddingsResponse(
+ error=Error(
+ type = "embeddings-error",
+ message = str(e),
+ ),
+ vectors=None,
+ ),
+ properties={"id": id}
+ )
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
+
+
diff --git a/trustgraph-base/trustgraph/base/flow.py b/trustgraph-base/trustgraph/base/flow.py
new file mode 100644
index 00000000..9cda34a0
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/flow.py
@@ -0,0 +1,32 @@
+
+import asyncio
+
+class Flow:
+ def __init__(self, id, flow, processor, defn):
+
+ self.id = id
+ self.name = flow
+
+ self.producer = {}
+
+ # Consumers and publishers. Is this a bit untidy?
+ self.consumer = {}
+
+ self.setting = {}
+
+ for spec in processor.specifications:
+ spec.add(self, processor, defn)
+
+ async def start(self):
+ for c in self.consumer.values():
+ await c.start()
+
+ async def stop(self):
+ for c in self.consumer.values():
+ await c.stop()
+
+ def __call__(self, key):
+ if key in self.producer: return self.producer[key]
+ if key in self.consumer: return self.consumer[key]
+ if key in self.setting: return self.setting[key].value
+ return None
diff --git a/trustgraph-base/trustgraph/base/flow_processor.py b/trustgraph-base/trustgraph/base/flow_processor.py
new file mode 100644
index 00000000..fdeb5950
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/flow_processor.py
@@ -0,0 +1,116 @@
+
+# Base class for processor with management of flows in & out which are managed
+# by configuration. This is probably all processor types, except for the
+# configuration service which can't manage itself.
+
+import json
+
+from pulsar.schema import JsonSchema
+
+from .. schema import Error
+from .. schema import config_request_queue, config_response_queue
+from .. schema import config_push_queue
+from .. log_level import LogLevel
+from . async_processor import AsyncProcessor
+from . flow import Flow
+
+# Parent class for configurable processors, configured with flows by
+# the config service
+class FlowProcessor(AsyncProcessor):
+
+ def __init__(self, **params):
+
+ # Initialise base class
+ super(FlowProcessor, self).__init__(**params)
+
+ # Register configuration handler
+ self.register_config_handler(self.on_configure_flows)
+
+ # Initialise flow information state
+ self.flows = {}
+
+ # These can be overriden by a derived class:
+
+ # Array of specifications: ConsumerSpec, ProducerSpec, SettingSpec
+ self.specifications = []
+
+ print("Service initialised.")
+
+ # Register a configuration variable
+ def register_specification(self, spec):
+ self.specifications.append(spec)
+
+ # Start processing for a new flow
+ async def start_flow(self, flow, defn):
+ self.flows[flow] = Flow(self.id, flow, self, defn)
+ await self.flows[flow].start()
+ print("Started flow: ", flow)
+
+ # Stop processing for a new flow
+ async def stop_flow(self, flow):
+ if flow in self.flows:
+ await self.flows[flow].stop()
+ del self.flows[flow]
+ print("Stopped flow: ", flow, flush=True)
+
+ # Event handler - called for a configuration change
+ async def on_configure_flows(self, config, version):
+
+ print("Got config version", version, flush=True)
+
+ # Skip over invalid data
+ if "flows-active" not in config: return
+
+ # Check there's configuration information for me
+ if self.id in config["flows-active"]:
+
+ # Get my flow config
+ flow_config = json.loads(config["flows-active"][self.id])
+
+ else:
+
+ print("No configuration settings for me.", flush=True)
+ flow_config = {}
+
+ # Get list of flows which should be running and are currently
+ # running
+ wanted_flows = flow_config.keys()
+ # This takes a copy, needed because dict gets modified by stop_flow
+ current_flows = list(self.flows.keys())
+
+ # Start all the flows which arent currently running
+ for flow in wanted_flows:
+ if flow not in current_flows:
+ await self.start_flow(flow, flow_config[flow])
+
+ # Stop all the unwanted flows which are due to be stopped
+ for flow in current_flows:
+ if flow not in wanted_flows:
+ await self.stop_flow(flow)
+
+ print("Handled config update")
+
+ # Start threads, just call parent
+ async def start(self):
+ await super(FlowProcessor, self).start()
+
+ @staticmethod
+ def add_args(parser):
+
+ AsyncProcessor.add_args(parser)
+
+ # parser.add_argument(
+ # '--rate-limit-retry',
+ # type=int,
+ # default=default_rate_limit_retry,
+ # help=f'Rate limit retry (default: {default_rate_limit_retry})'
+ # )
+
+ # parser.add_argument(
+ # '--rate-limit-timeout',
+ # type=int,
+ # default=default_rate_limit_timeout,
+ # help=f'Rate limit timeout (default: {default_rate_limit_timeout})'
+ # )
+
+
diff --git a/trustgraph-base/trustgraph/base/graph_embeddings_client.py b/trustgraph-base/trustgraph/base/graph_embeddings_client.py
new file mode 100644
index 00000000..e89364f2
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/graph_embeddings_client.py
@@ -0,0 +1,45 @@
+
+from . request_response_spec import RequestResponse, RequestResponseSpec
+from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
+from .. knowledge import Uri, Literal
+
+def to_value(x):
+ if x.is_uri: return Uri(x.value)
+ return Literal(x.value)
+
+class GraphEmbeddingsClient(RequestResponse):
+ async def query(self, vectors, limit=20, user="trustgraph",
+ collection="default", timeout=30):
+
+ resp = await self.request(
+ GraphEmbeddingsRequest(
+ vectors = vectors,
+ limit = limit,
+ user = user,
+ collection = collection
+ ),
+ timeout=timeout
+ )
+
+ print(resp, flush=True)
+
+ if resp.error:
+ raise RuntimeError(resp.error.message)
+
+ return [
+ to_value(v)
+ for v in resp.entities
+ ]
+
+class GraphEmbeddingsClientSpec(RequestResponseSpec):
+ def __init__(
+ self, request_name, response_name,
+ ):
+ super(GraphEmbeddingsClientSpec, self).__init__(
+ request_name = request_name,
+ request_schema = GraphEmbeddingsRequest,
+ response_name = response_name,
+ response_schema = GraphEmbeddingsResponse,
+ impl = GraphEmbeddingsClient,
+ )
+
diff --git a/trustgraph-base/trustgraph/base/graph_embeddings_query_service.py b/trustgraph-base/trustgraph/base/graph_embeddings_query_service.py
new file mode 100644
index 00000000..fb2e8dc5
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/graph_embeddings_query_service.py
@@ -0,0 +1,84 @@
+
+"""
+Graph embeddings query service. Input is vectors. Output is list of
+embeddings.
+"""
+
+from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
+from .. schema import Error, Value
+
+from . flow_processor import FlowProcessor
+from . consumer_spec import ConsumerSpec
+from . producer_spec import ProducerSpec
+
+default_ident = "ge-query"
+
+class GraphEmbeddingsQueryService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(GraphEmbeddingsQueryService, self).__init__(
+ **params | { "id": id }
+ )
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = GraphEmbeddingsRequest,
+ handler = self.on_message
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = GraphEmbeddingsResponse,
+ )
+ )
+
+ async def on_message(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ # Sender-produced ID
+ id = msg.properties()["id"]
+
+ print(f"Handling input {id}...", flush=True)
+
+ entities = await self.query_graph_embeddings(request)
+
+ print("Send response...", flush=True)
+ r = GraphEmbeddingsResponse(entities=entities, error=None)
+ await flow("response").send(r, properties={"id": id})
+
+ print("Done.", flush=True)
+
+ except Exception as e:
+
+ print(f"Exception: {e}")
+
+ print("Send error response...", flush=True)
+
+ r = GraphEmbeddingsResponse(
+ error=Error(
+ type = "graph-embeddings-query-error",
+ message = str(e),
+ ),
+ response=None,
+ )
+
+ await flow("response").send(r, properties={"id": id})
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
+def run():
+
+ Processor.launch(default_ident, __doc__)
+
diff --git a/trustgraph-base/trustgraph/base/graph_embeddings_store_service.py b/trustgraph-base/trustgraph/base/graph_embeddings_store_service.py
new file mode 100644
index 00000000..911b90c1
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/graph_embeddings_store_service.py
@@ -0,0 +1,50 @@
+
+"""
+Graph embeddings store base class
+"""
+
+from .. schema import GraphEmbeddings
+from .. base import FlowProcessor, ConsumerSpec
+from .. exceptions import TooManyRequests
+
+default_ident = "graph-embeddings-write"
+
+class GraphEmbeddingsStoreService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(GraphEmbeddingsStoreService, self).__init__(
+ **params | { "id": id }
+ )
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = GraphEmbeddings,
+ handler = self.on_message
+ )
+ )
+
+ async def on_message(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ await self.store_graph_embeddings(request)
+
+ except TooManyRequests as e:
+ raise e
+
+ except Exception as e:
+
+ print(f"Exception: {e}")
+ raise e
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
diff --git a/trustgraph-base/trustgraph/base/graph_rag_client.py b/trustgraph-base/trustgraph/base/graph_rag_client.py
new file mode 100644
index 00000000..c4f3f7ab
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/graph_rag_client.py
@@ -0,0 +1,33 @@
+
+from . request_response_spec import RequestResponse, RequestResponseSpec
+from .. schema import GraphRagQuery, GraphRagResponse
+
+class GraphRagClient(RequestResponse):
+ async def rag(self, query, user="trustgraph", collection="default",
+ timeout=600):
+ resp = await self.request(
+ GraphRagQuery(
+ query = query,
+ user = user,
+ collection = collection,
+ ),
+ timeout=timeout
+ )
+
+ if resp.error:
+ raise RuntimeError(resp.error.message)
+
+ return resp.response
+
+class GraphRagClientSpec(RequestResponseSpec):
+ def __init__(
+ self, request_name, response_name,
+ ):
+ super(GraphRagClientSpec, self).__init__(
+ request_name = request_name,
+ request_schema = GraphRagQuery,
+ response_name = response_name,
+ response_schema = GraphRagResponse,
+ impl = GraphRagClient,
+ )
+
diff --git a/trustgraph-base/trustgraph/base/llm_service.py b/trustgraph-base/trustgraph/base/llm_service.py
new file mode 100644
index 00000000..c79b819b
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/llm_service.py
@@ -0,0 +1,119 @@
+
+"""
+LLM text completion base class
+"""
+
+import time
+from prometheus_client import Histogram
+
+from .. schema import TextCompletionRequest, TextCompletionResponse, Error
+from .. exceptions import TooManyRequests
+from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
+
+default_ident = "text-completion"
+
+class LlmResult:
+ def __init__(self, text=None, in_token=None, out_token=None, model=None):
+ self.text = text
+ self.in_token = in_token
+ self.out_token = out_token
+ self.model = model
+ __slots__ = ["text", "in_token", "out_token", "model"]
+
+class LlmService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(LlmService, self).__init__(**params | { "id": id })
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = TextCompletionRequest,
+ handler = self.on_request
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = TextCompletionResponse
+ )
+ )
+
+ if not hasattr(__class__, "text_completion_metric"):
+ __class__.text_completion_metric = Histogram(
+ 'text_completion_duration',
+ 'Text completion duration (seconds)',
+ ["id", "flow"],
+ buckets=[
+ 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
+ 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
+ 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
+ 120.0
+ ]
+ )
+
+ async def on_request(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ # Sender-produced ID
+
+ id = msg.properties()["id"]
+
+ with __class__.text_completion_metric.labels(
+ id=self.id,
+ flow=f"{flow.name}-{consumer.name}",
+ ).time():
+
+ response = await self.generate_content(
+ request.system, request.prompt
+ )
+
+ await flow("response").send(
+ TextCompletionResponse(
+ error=None,
+ response=response.text,
+ in_token=response.in_token,
+ out_token=response.out_token,
+ model=response.model
+ ),
+ properties={"id": id}
+ )
+
+ except TooManyRequests as e:
+ raise e
+
+ except Exception as e:
+
+ # Apart from rate limits, treat all exceptions as unrecoverable
+
+ print(f"Exception: {e}")
+
+ print("Send error response...", flush=True)
+
+ await flow.producer["response"].send(
+ TextCompletionResponse(
+ error=Error(
+ type = "llm-error",
+ message = str(e),
+ ),
+ response=None,
+ in_token=None,
+ out_token=None,
+ model=None,
+ ),
+ properties={"id": id}
+ )
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
diff --git a/trustgraph-base/trustgraph/base/metrics.py b/trustgraph-base/trustgraph/base/metrics.py
new file mode 100644
index 00000000..4ffbac9c
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/metrics.py
@@ -0,0 +1,136 @@
+
+from prometheus_client import start_http_server, Info, Enum, Histogram
+from prometheus_client import Counter
+
+class ConsumerMetrics:
+
+ def __init__(self, processor, flow, name):
+
+ self.processor = processor
+ self.flow = flow
+ self.name = name
+
+ if not hasattr(__class__, "state_metric"):
+ __class__.state_metric = Enum(
+ 'consumer_state', 'Consumer state',
+ ["processor", "flow", "name"],
+ states=['stopped', 'running']
+ )
+
+ if not hasattr(__class__, "request_metric"):
+ __class__.request_metric = Histogram(
+ 'request_latency', 'Request latency (seconds)',
+ ["processor", "flow", "name"],
+ )
+
+ if not hasattr(__class__, "processing_metric"):
+ __class__.processing_metric = Counter(
+ 'processing_count', 'Processing count',
+ ["processor", "flow", "name", "status"],
+ )
+
+ if not hasattr(__class__, "rate_limit_metric"):
+ __class__.rate_limit_metric = Counter(
+ 'rate_limit_count', 'Rate limit event count',
+ ["processor", "flow", "name"],
+ )
+
+ def process(self, status):
+ __class__.processing_metric.labels(
+ processor = self.processor, flow = self.flow, name = self.name,
+ status=status
+ ).inc()
+
+ def rate_limit(self):
+ __class__.rate_limit_metric.labels(
+ processor = self.processor, flow = self.flow, name = self.name,
+ ).inc()
+
+ def state(self, state):
+ __class__.state_metric.labels(
+ processor = self.processor, flow = self.flow, name = self.name,
+ ).state(state)
+
+ def record_time(self):
+ return __class__.request_metric.labels(
+ processor = self.processor, flow = self.flow, name = self.name,
+ ).time()
+
+class ProducerMetrics:
+
+ def __init__(self, processor, flow, name):
+
+ self.processor = processor
+ self.flow = flow
+ self.name = name
+
+ if not hasattr(__class__, "producer_metric"):
+ __class__.producer_metric = Counter(
+ 'producer_count', 'Output items produced',
+ ["processor", "flow", "name"],
+ )
+
+ def inc(self):
+ __class__.producer_metric.labels(
+ processor = self.processor, flow = self.flow, name = self.name
+ ).inc()
+
+class ProcessorMetrics:
+ def __init__(self, processor):
+
+ self.processor = processor
+
+ if not hasattr(__class__, "processor_metric"):
+ __class__.processor_metric = Info(
+ 'processor', 'Processor configuration',
+ ["processor"]
+ )
+
+ def info(self, info):
+ __class__.processor_metric.labels(
+ processor = self.processor
+ ).info(info)
+
+class SubscriberMetrics:
+
+ def __init__(self, processor, flow, name):
+
+ self.processor = processor
+ self.flow = flow
+ self.name = name
+
+ if not hasattr(__class__, "state_metric"):
+ __class__.state_metric = Enum(
+ 'subscriber_state', 'Subscriber state',
+ ["processor", "flow", "name"],
+ states=['stopped', 'running']
+ )
+
+ if not hasattr(__class__, "received_metric"):
+ __class__.received_metric = Counter(
+ 'received_count', 'Received count',
+ ["processor", "flow", "name"],
+ )
+
+ if not hasattr(__class__, "dropped_metric"):
+ __class__.dropped_metric = Counter(
+ 'dropped_count', 'Dropped messages count',
+ ["processor", "flow", "name"],
+ )
+
+ def received(self):
+ __class__.received_metric.labels(
+ processor = self.processor, flow = self.flow, name = self.name,
+ ).inc()
+
+ def state(self, state):
+
+ __class__.state_metric.labels(
+ processor = self.processor, flow = self.flow, name = self.name,
+ ).state(state)
+
+ def dropped(self, state):
+ __class__.dropped_metric.labels(
+ processor = self.processor, flow = self.flow, name = self.name,
+ ).inc()
+
diff --git a/trustgraph-base/trustgraph/base/producer.py b/trustgraph-base/trustgraph/base/producer.py
index bc2d7791..550855b8 100644
--- a/trustgraph-base/trustgraph/base/producer.py
+++ b/trustgraph-base/trustgraph/base/producer.py
@@ -1,56 +1,74 @@
from pulsar.schema import JsonSchema
-from prometheus_client import Info, Counter
+import asyncio
-from . base_processor import BaseProcessor
+class Producer:
-class Producer(BaseProcessor):
+ def __init__(self, client, topic, schema, metrics=None,
+ chunking_enabled=True):
- def __init__(self, **params):
+ self.client = client
+ self.topic = topic
+ self.schema = schema
- output_queue = params.get("output_queue")
- output_schema = params.get("output_schema")
+ self.metrics = metrics
- if not hasattr(__class__, "output_metric"):
- __class__.output_metric = Counter(
- 'output_count', 'Output items created'
- )
+ self.running = True
+ self.producer = None
- if not hasattr(__class__, "pubsub_metric"):
- __class__.pubsub_metric = Info(
- 'pubsub', 'Pub/sub configuration'
- )
+ self.chunking_enabled = chunking_enabled
- __class__.pubsub_metric.info({
- "output_queue": output_queue,
- "output_schema": output_schema.__name__,
- })
+ def __del__(self):
- super(Producer, self).__init__(**params)
+ self.running = False
- if output_schema == None:
- raise RuntimeError("output_schema must be specified")
+ if hasattr(self, "producer"):
+ if self.producer:
+ self.producer.close()
- self.producer = self.client.create_producer(
- topic=output_queue,
- schema=JsonSchema(output_schema),
- chunking_enabled=True,
- )
+ async def start(self):
+ self.running = True
+
+ async def stop(self):
+ self.running = False
async def send(self, msg, properties={}):
- self.producer.send(msg, properties)
- __class__.output_metric.inc()
- @staticmethod
- def add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- ):
+ if not self.running: return
- BaseProcessor.add_args(parser)
+ while self.running and self.producer is None:
+
+ try:
+ print("Connect publisher to", self.topic, "...", flush=True)
+ self.producer = self.client.create_producer(
+ topic = self.topic,
+ schema = JsonSchema(self.schema),
+ chunking_enabled = self.chunking_enabled,
+ )
+ print("Connected to", self.topic, flush=True)
+ except Exception as e:
+ print("Exception:", e, flush=True)
+ await asyncio.sleep(2)
+
+ if not self.running: break
+
+ while self.running:
+
+ try:
+
+ await asyncio.to_thread(
+ self.producer.send,
+ msg, properties
+ )
+
+ if self.metrics:
+ self.metrics.inc()
+
+ # Delivery success, break out of loop
+ break
+
+ except Exception as e:
+ print("Exception:", e, flush=True)
+ self.producer.close()
+ self.producer = None
- parser.add_argument(
- '-o', '--output-queue',
- default=default_output_queue,
- help=f'Output queue (default: {default_output_queue})'
- )
diff --git a/trustgraph-base/trustgraph/base/producer_spec.py b/trustgraph-base/trustgraph/base/producer_spec.py
new file mode 100644
index 00000000..9c8bbc6a
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/producer_spec.py
@@ -0,0 +1,25 @@
+
+from . producer import Producer
+from . metrics import ProducerMetrics
+from . spec import Spec
+
+class ProducerSpec(Spec):
+ def __init__(self, name, schema):
+ self.name = name
+ self.schema = schema
+
+ def add(self, flow, processor, definition):
+
+ producer_metrics = ProducerMetrics(
+ processor = flow.id, flow = flow.name, name = self.name
+ )
+
+ producer = Producer(
+ client = processor.pulsar_client,
+ topic = definition[self.name],
+ schema = self.schema,
+ metrics = producer_metrics,
+ )
+
+ flow.producer[self.name] = producer
+
diff --git a/trustgraph-base/trustgraph/base/prompt_client.py b/trustgraph-base/trustgraph/base/prompt_client.py
new file mode 100644
index 00000000..9e8ab033
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/prompt_client.py
@@ -0,0 +1,93 @@
+
+import json
+
+from . request_response_spec import RequestResponse, RequestResponseSpec
+from .. schema import PromptRequest, PromptResponse
+
+class PromptClient(RequestResponse):
+
+ async def prompt(self, id, variables, timeout=600):
+
+ resp = await self.request(
+ PromptRequest(
+ id = id,
+ terms = {
+ k: json.dumps(v)
+ for k, v in variables.items()
+ }
+ ),
+ timeout=timeout
+ )
+
+ if resp.error:
+ raise RuntimeError(resp.error.message)
+
+ if resp.text: return resp.text
+
+ return json.loads(resp.object)
+
+ async def extract_definitions(self, text, timeout=600):
+ return await self.prompt(
+ id = "extract-definitions",
+ variables = { "text": text },
+ timeout = timeout,
+ )
+
+ async def extract_relationships(self, text, timeout=600):
+ return await self.prompt(
+ id = "extract-relationships",
+ variables = { "text": text },
+ timeout = timeout,
+ )
+
+ async def kg_prompt(self, query, kg, timeout=600):
+ return await self.prompt(
+ id = "kg-prompt",
+ variables = {
+ "query": query,
+ "knowledge": [
+ { "s": v[0], "p": v[1], "o": v[2] }
+ for v in kg
+ ]
+ },
+ timeout = timeout,
+ )
+
+ async def document_prompt(self, query, documents, timeout=600):
+ return await self.prompt(
+ id = "document-prompt",
+ variables = {
+ "query": query,
+ "documents": documents,
+ },
+ timeout = timeout,
+ )
+
+ async def agent_react(self, variables, timeout=600):
+ return await self.prompt(
+ id = "agent-react",
+ variables = variables,
+ timeout = timeout,
+ )
+
+ async def question(self, question, timeout=600):
+ return await self.prompt(
+ id = "question",
+ variables = {
+ "question": question,
+ },
+ timeout = timeout,
+ )
+
+class PromptClientSpec(RequestResponseSpec):
+ def __init__(
+ self, request_name, response_name,
+ ):
+ super(PromptClientSpec, self).__init__(
+ request_name = request_name,
+ request_schema = PromptRequest,
+ response_name = response_name,
+ response_schema = PromptResponse,
+ impl = PromptClient,
+ )
+
diff --git a/trustgraph-base/trustgraph/base/publisher.py b/trustgraph-base/trustgraph/base/publisher.py
index 2da63331..ef963e84 100644
--- a/trustgraph-base/trustgraph/base/publisher.py
+++ b/trustgraph-base/trustgraph/base/publisher.py
@@ -1,47 +1,59 @@
-import queue
+from pulsar.schema import JsonSchema
+
+import asyncio
import time
import pulsar
-import threading
class Publisher:
- def __init__(self, pulsar_client, topic, schema=None, max_size=10,
+ def __init__(self, client, topic, schema=None, max_size=10,
chunking_enabled=True):
- self.client = pulsar_client
+ self.client = client
self.topic = topic
self.schema = schema
- self.q = queue.Queue(maxsize=max_size)
+ self.q = asyncio.Queue(maxsize=max_size)
self.chunking_enabled = chunking_enabled
self.running = True
+ self.task = None
- def start(self):
- self.task = threading.Thread(target=self.run)
- self.task.start()
+ async def start(self):
+ self.task = asyncio.create_task(self.run())
- def stop(self):
+ async def stop(self):
self.running = False
- def join(self):
- self.stop()
- self.task.join()
+ if self.task:
+ await self.task
- def run(self):
+ async def join(self):
+ await self.stop()
+
+ if self.task:
+ await self.task
+
+ async def run(self):
while self.running:
try:
+
producer = self.client.create_producer(
topic=self.topic,
- schema=self.schema,
+ schema=JsonSchema(self.schema),
chunking_enabled=self.chunking_enabled,
)
while self.running:
try:
- id, item = self.q.get(timeout=0.5)
- except queue.Empty:
+ id, item = await asyncio.wait_for(
+ self.q.get(),
+ timeout=0.25
+ )
+ except asyncio.TimeoutError:
+ continue
+ except asyncio.QueueEmpty:
continue
if id:
@@ -52,10 +64,12 @@ class Publisher:
except Exception as e:
print("Exception:", e, flush=True)
+ if not self.running:
+ return
+
# If handler drops out, sleep a retry
- time.sleep(2)
+ await asyncio.sleep(1)
- def send(self, id, msg):
- self.q.put((id, msg))
+ async def send(self, id, item):
+ await self.q.put((id, item))
-
diff --git a/trustgraph-base/trustgraph/base/pubsub.py b/trustgraph-base/trustgraph/base/pubsub.py
new file mode 100644
index 00000000..b9f233d4
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/pubsub.py
@@ -0,0 +1,80 @@
+
+import os
+import pulsar
+import uuid
+from pulsar.schema import JsonSchema
+
+from .. log_level import LogLevel
+
+class PulsarClient:
+
+ default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
+ default_pulsar_api_key = os.getenv("PULSAR_API_KEY", None)
+
+ def __init__(self, **params):
+
+ self.client = None
+
+ pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
+ pulsar_listener = params.get("pulsar_listener", None)
+ pulsar_api_key = params.get(
+ "pulsar_api_key",
+ self.default_pulsar_api_key
+ )
+ log_level = params.get("log_level", LogLevel.INFO)
+
+ self.pulsar_host = pulsar_host
+ self.pulsar_api_key = pulsar_api_key
+
+ if pulsar_api_key:
+ auth = pulsar.AuthenticationToken(pulsar_api_key)
+ self.client = pulsar.Client(
+ pulsar_host,
+ authentication=auth,
+ logger=pulsar.ConsoleLogger(log_level.to_pulsar())
+ )
+ else:
+ self.client = pulsar.Client(
+ pulsar_host,
+ listener_name=pulsar_listener,
+ logger=pulsar.ConsoleLogger(log_level.to_pulsar())
+ )
+
+ self.pulsar_listener = pulsar_listener
+
+ def close(self):
+ self.client.close()
+
+ def __del__(self):
+
+ if hasattr(self, "client"):
+ if self.client:
+ self.client.close()
+
+ @staticmethod
+ def add_args(parser):
+
+ parser.add_argument(
+ '-p', '--pulsar-host',
+ default=__class__.default_pulsar_host,
+ help=f'Pulsar host (default: {__class__.default_pulsar_host})',
+ )
+
+ parser.add_argument(
+ '--pulsar-api-key',
+ default=__class__.default_pulsar_api_key,
+ help=f'Pulsar API key',
+ )
+
+ parser.add_argument(
+ '--pulsar-listener',
+ help=f'Pulsar listener (default: none)',
+ )
+
+ parser.add_argument(
+ '-l', '--log-level',
+ type=LogLevel,
+ default=LogLevel.INFO,
+ choices=list(LogLevel),
+ help=f'Output queue (default: info)'
+ )
diff --git a/trustgraph-base/trustgraph/base/request_response_spec.py b/trustgraph-base/trustgraph/base/request_response_spec.py
new file mode 100644
index 00000000..7b8b1be8
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/request_response_spec.py
@@ -0,0 +1,143 @@
+
+import uuid
+import asyncio
+
+from . subscriber import Subscriber
+from . producer import Producer
+from . spec import Spec
+from . metrics import ConsumerMetrics, ProducerMetrics, SubscriberMetrics
+
+class RequestResponse(Subscriber):
+
+ def __init__(
+ self, client, subscription, consumer_name,
+ request_topic, request_schema,
+ request_metrics,
+ response_topic, response_schema,
+ response_metrics,
+ ):
+
+ super(RequestResponse, self).__init__(
+ client = client,
+ subscription = subscription,
+ consumer_name = consumer_name,
+ topic = response_topic,
+ schema = response_schema,
+ metrics = response_metrics,
+ )
+
+ self.producer = Producer(
+ client = client,
+ topic = request_topic,
+ schema = request_schema,
+ metrics = request_metrics,
+ )
+
+ async def start(self):
+ await self.producer.start()
+ await super(RequestResponse, self).start()
+
+ async def stop(self):
+ await self.producer.stop()
+ await super(RequestResponse, self).stop()
+
+ async def request(self, req, timeout=300, recipient=None):
+
+ id = str(uuid.uuid4())
+
+ print("Request", id, "...", flush=True)
+
+ q = await self.subscribe(id)
+
+ try:
+
+ await self.producer.send(
+ req,
+ properties={"id": id}
+ )
+
+ except Exception as e:
+
+ print("Exception:", e)
+ raise e
+
+
+ try:
+
+ while True:
+
+ resp = await asyncio.wait_for(
+ q.get(),
+ timeout=timeout
+ )
+
+ print("Got response.", flush=True)
+
+ if recipient is None:
+
+ # If no recipient handler, just return the first
+ # response we get
+ return resp
+ else:
+
+ # Recipient handler gets to decide when we're done b
+ # returning a boolean
+ fin = await recipient(resp)
+
+ # If done, return the last result otherwise loop round for
+ # next response
+ if fin:
+ return resp
+ else:
+ continue
+
+ except Exception as e:
+
+ print("Exception:", e)
+ raise e
+
+ finally:
+
+ await self.unsubscribe(id)
+
+# This deals with the request/response case. The caller needs to
+# use another service in request/response mode. Uses two topics:
+# - we send on the request topic as a producer
+# - we receive on the response topic as a subscriber
+class RequestResponseSpec(Spec):
+ def __init__(
+ self, request_name, request_schema, response_name,
+ response_schema, impl=RequestResponse
+ ):
+ self.request_name = request_name
+ self.request_schema = request_schema
+ self.response_name = response_name
+ self.response_schema = response_schema
+ self.impl = impl
+
+ def add(self, flow, processor, definition):
+
+ request_metrics = ProducerMetrics(
+ processor = flow.id, flow = flow.name, name = self.request_name
+ )
+
+ response_metrics = SubscriberMetrics(
+ processor = flow.id, flow = flow.name, name = self.request_name
+ )
+
+ rr = self.impl(
+ client = processor.pulsar_client,
+ subscription = (
+ processor.id + "--" + flow.name + "--" + self.request_name
+ ),
+ consumer_name = flow.id,
+ request_topic = definition[self.request_name],
+ request_schema = self.request_schema,
+ request_metrics = request_metrics,
+ response_topic = definition[self.response_name],
+ response_schema = self.response_schema,
+ response_metrics = response_metrics,
+ )
+
+ flow.consumer[self.request_name] = rr
+
diff --git a/trustgraph-base/trustgraph/base/setting_spec.py b/trustgraph-base/trustgraph/base/setting_spec.py
new file mode 100644
index 00000000..5c5152b2
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/setting_spec.py
@@ -0,0 +1,19 @@
+
+from . spec import Spec
+
+class Setting:
+ def __init__(self, value):
+ self.value = value
+ async def start():
+ pass
+ async def stop():
+ pass
+
+class SettingSpec(Spec):
+ def __init__(self, name):
+ self.name = name
+
+ def add(self, flow, processor, definition):
+
+ flow.config[self.name] = Setting(definition[self.name])
+
diff --git a/trustgraph-base/trustgraph/base/spec.py b/trustgraph-base/trustgraph/base/spec.py
new file mode 100644
index 00000000..4d0d937b
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/spec.py
@@ -0,0 +1,4 @@
+
+class Spec:
+ pass
+
diff --git a/trustgraph-base/trustgraph/base/subscriber.py b/trustgraph-base/trustgraph/base/subscriber.py
index 30ade3ee..8467b0bf 100644
--- a/trustgraph-base/trustgraph/base/subscriber.py
+++ b/trustgraph-base/trustgraph/base/subscriber.py
@@ -1,14 +1,14 @@
-import queue
-import pulsar
-import threading
+from pulsar.schema import JsonSchema
+import asyncio
+import _pulsar
import time
class Subscriber:
- def __init__(self, pulsar_client, topic, subscription, consumer_name,
- schema=None, max_size=100):
- self.client = pulsar_client
+ def __init__(self, client, topic, subscription, consumer_name,
+ schema=None, max_size=100, metrics=None):
+ self.client = client
self.topic = topic
self.subscription = subscription
self.consumer_name = consumer_name
@@ -16,38 +16,73 @@ class Subscriber:
self.q = {}
self.full = {}
self.max_size = max_size
- self.lock = threading.Lock()
+ self.lock = asyncio.Lock()
self.running = True
+ self.metrics = metrics
+ self.task = None
- def start(self):
- self.task = threading.Thread(target=self.run)
- self.task.start()
+ self.consumer = None
+
+ def __del__(self):
- def stop(self):
self.running = False
- def join(self):
- self.task.join()
+ async def start(self):
- def run(self):
+ self.consumer = self.client.subscribe(
+ topic = self.topic,
+ subscription_name = self.subscription,
+ consumer_name = self.consumer_name,
+ schema = JsonSchema(self.schema),
+ )
+
+ self.task = asyncio.create_task(self.run())
+
+ async def stop(self):
+ self.running = False
+
+ if self.task:
+ await self.task
+
+ async def join(self):
+ await self.stop()
+
+ if self.task:
+ await self.task
+
+ async def run(self):
while self.running:
+ if self.metrics:
+ self.metrics.state("stopped")
+
try:
- consumer = self.client.subscribe(
- topic=self.topic,
- subscription_name=self.subscription,
- consumer_name=self.consumer_name,
- schema=self.schema,
- )
+ if self.metrics:
+ self.metrics.state("running")
+
+ print("Subscriber running...", flush=True)
while self.running:
- msg = consumer.receive()
+ try:
+ msg = await asyncio.to_thread(
+ self.consumer.receive,
+ timeout_millis=250
+ )
+ except _pulsar.Timeout:
+ continue
+ except Exception as e:
+ print("Exception:", e, flush=True)
+ print(type(e))
+ raise e
+
+ if self.metrics:
+ self.metrics.received()
# Acknowledge successful reception of the message
- consumer.acknowledge(msg)
+ self.consumer.acknowledge(msg)
try:
id = msg.properties()["id"]
@@ -56,57 +91,83 @@ class Subscriber:
value = msg.value()
- with self.lock:
+ async with self.lock:
+
+ # FIXME: Hard-coded timeouts
if id in self.q:
+
try:
# FIXME: Timeout means data goes missing
- self.q[id].put(value, timeout=0.5)
- except:
- pass
+ await asyncio.wait_for(
+ self.q[id].put(value),
+ timeout=1
+ )
+
+ except Exception as e:
+ self.metrics.dropped()
+ print("Q Put:", e, flush=True)
for q in self.full.values():
try:
# FIXME: Timeout means data goes missing
- q.put(value, timeout=0.5)
- except:
- pass
+ await asyncio.wait_for(
+ q.put(value),
+ timeout=1
+ )
+ except Exception as e:
+ self.metrics.dropped()
+ print("Q Put:", e, flush=True)
except Exception as e:
- print("Exception:", e, flush=True)
+ print("Subscriber exception:", e, flush=True)
+
+ finally:
+
+ if self.consumer:
+ self.consumer.unsubscribe()
+ self.consumer.close()
+ self.consumer = None
+
+ if self.metrics:
+ self.metrics.state("stopped")
+
+ if not self.running:
+ return
+
# If handler drops out, sleep a retry
- time.sleep(2)
+ await asyncio.sleep(1)
- def subscribe(self, id):
+ async def subscribe(self, id):
- with self.lock:
+ async with self.lock:
- q = queue.Queue(maxsize=self.max_size)
+ q = asyncio.Queue(maxsize=self.max_size)
self.q[id] = q
return q
- def unsubscribe(self, id):
+ async def unsubscribe(self, id):
- with self.lock:
+ async with self.lock:
if id in self.q:
# self.q[id].shutdown(immediate=True)
del self.q[id]
- def subscribe_all(self, id):
+ async def subscribe_all(self, id):
- with self.lock:
+ async with self.lock:
- q = queue.Queue(maxsize=self.max_size)
+ q = asyncio.Queue(maxsize=self.max_size)
self.full[id] = q
return q
- def unsubscribe_all(self, id):
+ async def unsubscribe_all(self, id):
- with self.lock:
+ async with self.lock:
if id in self.full:
# self.full[id].shutdown(immediate=True)
diff --git a/trustgraph-base/trustgraph/base/subscriber_spec.py b/trustgraph-base/trustgraph/base/subscriber_spec.py
new file mode 100644
index 00000000..7dca09db
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/subscriber_spec.py
@@ -0,0 +1,30 @@
+
+from . metrics import SubscriberMetrics
+from . subscriber import Subscriber
+from . spec import Spec
+
+class SubscriberSpec(Spec):
+
+ def __init__(self, name, schema):
+ self.name = name
+ self.schema = schema
+
+ def add(self, flow, processor, definition):
+
+ subscriber_metrics = SubscriberMetrics(
+ processor = flow.id, flow = flow.name, name = self.name
+ )
+
+ subscriber = Subscriber(
+ client = processor.pulsar_client,
+ topic = definition[self.name],
+ subscription = flow.id,
+ consumer_name = flow.id,
+ schema = self.schema,
+ metrics = subscriber_metrics,
+ )
+
+ # Put it in the consumer map, does that work?
+ # It means it gets start/stop call.
+ flow.consumer[self.name] = subscriber
+
diff --git a/trustgraph-base/trustgraph/base/text_completion_client.py b/trustgraph-base/trustgraph/base/text_completion_client.py
new file mode 100644
index 00000000..aba2fada
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/text_completion_client.py
@@ -0,0 +1,30 @@
+
+from . request_response_spec import RequestResponse, RequestResponseSpec
+from .. schema import TextCompletionRequest, TextCompletionResponse
+
+class TextCompletionClient(RequestResponse):
+ async def text_completion(self, system, prompt, timeout=600):
+ resp = await self.request(
+ TextCompletionRequest(
+ system = system, prompt = prompt
+ ),
+ timeout=timeout
+ )
+
+ if resp.error:
+ raise RuntimeError(resp.error.message)
+
+ return resp.response
+
+class TextCompletionClientSpec(RequestResponseSpec):
+ def __init__(
+ self, request_name, response_name,
+ ):
+ super(TextCompletionClientSpec, self).__init__(
+ request_name = request_name,
+ request_schema = TextCompletionRequest,
+ response_name = response_name,
+ response_schema = TextCompletionResponse,
+ impl = TextCompletionClient,
+ )
+
diff --git a/trustgraph-base/trustgraph/base/triples_client.py b/trustgraph-base/trustgraph/base/triples_client.py
new file mode 100644
index 00000000..c9f747b5
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/triples_client.py
@@ -0,0 +1,61 @@
+
+from . request_response_spec import RequestResponse, RequestResponseSpec
+from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
+from .. knowledge import Uri, Literal
+
+class Triple:
+ def __init__(self, s, p, o):
+ self.s = s
+ self.p = p
+ self.o = o
+
+def to_value(x):
+ if x.is_uri: return Uri(x.value)
+ return Literal(x.value)
+
+def from_value(x):
+ if x is None: return None
+ if isinstance(x, Uri):
+ return Value(value=str(x), is_uri=True)
+ else:
+ return Value(value=str(x), is_uri=False)
+
+class TriplesClient(RequestResponse):
+ async def query(self, s=None, p=None, o=None, limit=20,
+ user="trustgraph", collection="default",
+ timeout=30):
+
+ resp = await self.request(
+ TriplesQueryRequest(
+ s = from_value(s),
+ p = from_value(p),
+ o = from_value(o),
+ limit = limit,
+ user = user,
+ collection = collection,
+ ),
+ timeout=timeout
+ )
+
+ if resp.error:
+ raise RuntimeError(resp.error.message)
+
+ triples = [
+ Triple(to_value(v.s), to_value(v.p), to_value(v.o))
+ for v in resp.triples
+ ]
+
+ return triples
+
+class TriplesClientSpec(RequestResponseSpec):
+ def __init__(
+ self, request_name, response_name,
+ ):
+ super(TriplesClientSpec, self).__init__(
+ request_name = request_name,
+ request_schema = TriplesQueryRequest,
+ response_name = response_name,
+ response_schema = TriplesQueryResponse,
+ impl = TriplesClient,
+ )
+
diff --git a/trustgraph-base/trustgraph/base/triples_query_service.py b/trustgraph-base/trustgraph/base/triples_query_service.py
new file mode 100644
index 00000000..37acc622
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/triples_query_service.py
@@ -0,0 +1,82 @@
+
+"""
+Triples query service. Input is a (s, p, o) triple, some values may be
+null. Output is a list of triples.
+"""
+
+from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
+from .. schema import Value, Triple
+
+from . flow_processor import FlowProcessor
+from . consumer_spec import ConsumerSpec
+from . producer_spec import ProducerSpec
+
+default_ident = "triples-query"
+
+class TriplesQueryService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(TriplesQueryService, self).__init__(**params | { "id": id })
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = TriplesQueryRequest,
+ handler = self.on_message
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = TriplesQueryResponse,
+ )
+ )
+
+ async def on_message(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ # Sender-produced ID
+ id = msg.properties()["id"]
+
+ print(f"Handling input {id}...", flush=True)
+
+ triples = await self.query_triples(request)
+
+ print("Send response...", flush=True)
+ r = TriplesQueryResponse(triples=triples, error=None)
+ await flow("response").send(r, properties={"id": id})
+
+ print("Done.", flush=True)
+
+ except Exception as e:
+
+ print(f"Exception: {e}")
+
+ print("Send error response...", flush=True)
+
+ r = TriplesQueryResponse(
+ error = Error(
+ type = "triples-query-error",
+ message = str(e),
+ ),
+ triples = None,
+ )
+
+ await flow("response").send(r, properties={"id": id})
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
+def run():
+
+ Processor.launch(default_ident, __doc__)
+
diff --git a/trustgraph-base/trustgraph/base/triples_store_service.py b/trustgraph-base/trustgraph/base/triples_store_service.py
new file mode 100644
index 00000000..74f95f57
--- /dev/null
+++ b/trustgraph-base/trustgraph/base/triples_store_service.py
@@ -0,0 +1,47 @@
+
+"""
+Triples store base class
+"""
+
+from .. schema import Triples
+from .. base import FlowProcessor, ConsumerSpec
+
+default_ident = "triples-write"
+
+class TriplesStoreService(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ super(TriplesStoreService, self).__init__(**params | { "id": id })
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = Triples,
+ handler = self.on_message
+ )
+ )
+
+ async def on_message(self, msg, consumer, flow):
+
+ try:
+
+ request = msg.value()
+
+ await self.store_triples(request)
+
+ except TooManyRequests as e:
+ raise e
+
+ except Exception as e:
+
+ print(f"Exception: {e}")
+ raise e
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
diff --git a/trustgraph-base/trustgraph/clients/base.py b/trustgraph-base/trustgraph/clients/base.py
index ac809123..25eac3b7 100644
--- a/trustgraph-base/trustgraph/clients/base.py
+++ b/trustgraph-base/trustgraph/clients/base.py
@@ -28,6 +28,7 @@ class BaseClient:
output_schema=None,
pulsar_host="pulsar://pulsar:6650",
pulsar_api_key=None,
+ listener=None,
):
if input_queue == None: raise RuntimeError("Need input_queue")
@@ -41,14 +42,16 @@ class BaseClient:
if pulsar_api_key:
auth = pulsar.AuthenticationToken(pulsar_api_key)
self.client = pulsar.Client(
- pulsar_host,
- logger=pulsar.ConsoleLogger(log_level),
- authentication=auth,
+ pulsar_host,
+ logger=pulsar.ConsoleLogger(log_level),
+ authentication=auth,
+ listener=listener,
)
else:
self.client = pulsar.Client(
- pulsar_host,
- logger=pulsar.ConsoleLogger(log_level)
+ pulsar_host,
+ logger=pulsar.ConsoleLogger(log_level),
+ listener_name=listener,
)
self.producer = self.client.create_producer(
diff --git a/trustgraph-base/trustgraph/clients/config_client.py b/trustgraph-base/trustgraph/clients/config_client.py
new file mode 100644
index 00000000..ed8c704a
--- /dev/null
+++ b/trustgraph-base/trustgraph/clients/config_client.py
@@ -0,0 +1,161 @@
+
+import _pulsar
+import json
+import dataclasses
+
+from .. schema import ConfigRequest, ConfigResponse, ConfigKey, ConfigValue
+from .. schema import config_request_queue
+from .. schema import config_response_queue
+from . base import BaseClient
+
+# Ugly
+ERROR=_pulsar.LoggerLevel.Error
+WARN=_pulsar.LoggerLevel.Warn
+INFO=_pulsar.LoggerLevel.Info
+DEBUG=_pulsar.LoggerLevel.Debug
+
+@dataclasses.dataclass
+class Definition:
+ name: str
+ definition: str
+
+@dataclasses.dataclass
+class Relationship:
+ s: str
+ p: str
+ o: str
+ o_entity: str
+
+@dataclasses.dataclass
+class Topic:
+ name: str
+ definition: str
+
+class ConfigClient(BaseClient):
+
+ def __init__(
+ self, log_level=ERROR,
+ subscriber=None,
+ input_queue=None,
+ output_queue=None,
+ pulsar_host="pulsar://pulsar:6650",
+ listener=None,
+ pulsar_api_key=None,
+ ):
+
+ if input_queue == None:
+ input_queue = config_request_queue
+
+ if output_queue == None:
+ output_queue = config_response_queue
+
+ super(ConfigClient, self).__init__(
+ log_level=log_level,
+ subscriber=subscriber,
+ input_queue=input_queue,
+ output_queue=output_queue,
+ pulsar_host=pulsar_host,
+ pulsar_api_key=pulsar_api_key,
+ input_schema=ConfigRequest,
+ output_schema=ConfigResponse,
+ listener=listener,
+ )
+
+ def get(self, keys, timeout=300):
+
+ resp = self.call(
+ id=id,
+ operation="get",
+ keys=[
+ ConfigKey(
+ type = k["type"],
+ key = k["key"]
+ )
+ for k in keys
+ ],
+ timeout=timeout
+ )
+
+ return [
+ {
+ "type": v.type,
+ "key": v.key,
+ "value": v.value
+ }
+ for v in resp.values
+ ]
+
+ def list(self, type, timeout=300):
+
+ resp = self.call(
+ id=id,
+ operation="list",
+ type=type,
+ timeout=timeout
+ )
+
+ return resp.directory
+
+ def getvalues(self, type, timeout=300):
+
+ resp = self.call(
+ id=id,
+ operation="getvalues",
+ type=type,
+ timeout=timeout
+ )
+
+ return [
+ {
+ "type": v.type,
+ "key": v.key,
+ "value": v.value
+ }
+ for v in resp.values
+ ]
+
+ def delete(self, keys, timeout=300):
+
+ resp = self.call(
+ id=id,
+ operation="delete",
+ keys=[
+ ConfigKey(
+ type = k["type"],
+ key = k["key"]
+ )
+ for k in keys
+ ],
+ timeout=timeout
+ )
+
+ return None
+
+ def put(self, values, timeout=300):
+
+ resp = self.call(
+ id=id,
+ operation="put",
+ values=[
+ ConfigValue(
+ type = v["type"],
+ key = v["key"],
+ value = v["value"]
+ )
+ for v in values
+ ],
+ timeout=timeout
+ )
+
+ return None
+
+ def config(self, timeout=300):
+
+ resp = self.call(
+ id=id,
+ operation="config",
+ timeout=timeout
+ )
+
+ return resp.config, resp.version
+
diff --git a/trustgraph-base/trustgraph/clients/embeddings_client.py b/trustgraph-base/trustgraph/clients/embeddings_client.py
index 811f6ed2..1b1c0dc8 100644
--- a/trustgraph-base/trustgraph/clients/embeddings_client.py
+++ b/trustgraph-base/trustgraph/clients/embeddings_client.py
@@ -1,7 +1,6 @@
from pulsar.schema import JsonSchema
from .. schema import EmbeddingsRequest, EmbeddingsResponse
-from .. schema import embeddings_request_queue, embeddings_response_queue
from . base import BaseClient
import _pulsar
@@ -23,12 +22,6 @@ class EmbeddingsClient(BaseClient):
pulsar_api_key=None,
):
- if input_queue == None:
- input_queue=embeddings_request_queue
-
- if output_queue == None:
- output_queue=embeddings_response_queue
-
super(EmbeddingsClient, self).__init__(
log_level=log_level,
subscriber=subscriber,
@@ -43,4 +36,3 @@ class EmbeddingsClient(BaseClient):
def request(self, text, timeout=300):
return self.call(text=text, timeout=timeout).vectors
-
diff --git a/trustgraph-base/trustgraph/schema/__init__.py b/trustgraph-base/trustgraph/schema/__init__.py
index 9c44a743..957ebcbd 100644
--- a/trustgraph-base/trustgraph/schema/__init__.py
+++ b/trustgraph-base/trustgraph/schema/__init__.py
@@ -11,5 +11,7 @@ from . metadata import *
from . agent import *
from . lookup import *
from . library import *
-
+from . config import *
+from . flows import *
+from . knowledge import *
diff --git a/trustgraph-base/trustgraph/schema/agent.py b/trustgraph-base/trustgraph/schema/agent.py
index 9bcdde51..ee20a9aa 100644
--- a/trustgraph-base/trustgraph/schema/agent.py
+++ b/trustgraph-base/trustgraph/schema/agent.py
@@ -26,12 +26,5 @@ class AgentResponse(Record):
thought = String()
observation = String()
-agent_request_queue = topic(
- 'agent', kind='non-persistent', namespace='request'
-)
-agent_response_queue = topic(
- 'agent', kind='non-persistent', namespace='response'
-)
-
############################################################################
diff --git a/trustgraph-base/trustgraph/schema/config.py b/trustgraph-base/trustgraph/schema/config.py
new file mode 100644
index 00000000..3be63aa3
--- /dev/null
+++ b/trustgraph-base/trustgraph/schema/config.py
@@ -0,0 +1,71 @@
+
+from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
+
+from . topic import topic
+from . types import Error
+
+############################################################################
+
+# Config service:
+# get(keys) -> (version, values)
+# list(type) -> (version, values)
+# getvalues(type) -> (version, values)
+# put(values) -> ()
+# delete(keys) -> ()
+# config() -> (version, config)
+class ConfigKey(Record):
+ type = String()
+ key = String()
+
+class ConfigValue(Record):
+ type = String()
+ key = String()
+ value = String()
+
+# Prompt services, abstract the prompt generation
+class ConfigRequest(Record):
+
+ operation = String() # get, list, getvalues, delete, put, config
+
+ # get, delete
+ keys = Array(ConfigKey())
+
+ # list, getvalues
+ type = String()
+
+ # put
+ values = Array(ConfigValue())
+
+class ConfigResponse(Record):
+
+ # get, list, getvalues, config
+ version = Integer()
+
+ # get, getvalues
+ values = Array(ConfigValue())
+
+ # list
+ directory = Array(String())
+
+ # config
+ config = Map(Map(String()))
+
+ # Everything
+ error = Error()
+
+class ConfigPush(Record):
+ version = Integer()
+ config = Map(Map(String()))
+
+config_request_queue = topic(
+ 'config', kind='non-persistent', namespace='request'
+)
+config_response_queue = topic(
+ 'config', kind='non-persistent', namespace='response'
+)
+config_push_queue = topic(
+ 'config', kind='persistent', namespace='config'
+)
+
+############################################################################
+
diff --git a/trustgraph-base/trustgraph/schema/documents.py b/trustgraph-base/trustgraph/schema/documents.py
index fd0049ee..e479371d 100644
--- a/trustgraph-base/trustgraph/schema/documents.py
+++ b/trustgraph-base/trustgraph/schema/documents.py
@@ -11,8 +11,6 @@ class Document(Record):
metadata = Metadata()
data = Bytes()
-document_ingest_queue = topic('document-load')
-
############################################################################
# Text documents / text from PDF
@@ -21,8 +19,6 @@ class TextDocument(Record):
metadata = Metadata()
text = Bytes()
-text_ingest_queue = topic('text-document-load')
-
############################################################################
# Chunks of text
@@ -31,8 +27,6 @@ class Chunk(Record):
metadata = Metadata()
chunk = Bytes()
-chunk_ingest_queue = topic('chunk-load')
-
############################################################################
# Document embeddings are embeddings associated with a chunk
@@ -46,8 +40,6 @@ class DocumentEmbeddings(Record):
metadata = Metadata()
chunks = Array(ChunkEmbeddings())
-document_embeddings_store_queue = topic('document-embeddings-store')
-
############################################################################
# Doc embeddings query
@@ -62,10 +54,3 @@ class DocumentEmbeddingsResponse(Record):
error = Error()
documents = Array(Bytes())
-document_embeddings_request_queue = topic(
- 'doc-embeddings', kind='non-persistent', namespace='request'
-)
-document_embeddings_response_queue = topic(
- 'doc-embeddings', kind='non-persistent', namespace='response',
-)
-
diff --git a/trustgraph-base/trustgraph/schema/flows.py b/trustgraph-base/trustgraph/schema/flows.py
new file mode 100644
index 00000000..28b90f5d
--- /dev/null
+++ b/trustgraph-base/trustgraph/schema/flows.py
@@ -0,0 +1,66 @@
+
+from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
+
+from . topic import topic
+from . types import Error
+
+############################################################################
+
+# Flow service:
+# list_classes() -> (classname[])
+# get_class(classname) -> (class)
+# put_class(class) -> (class)
+# delete_class(classname) -> ()
+#
+# list_flows() -> (flowid[])
+# get_flow(flowid) -> (flow)
+# start_flow(flowid, classname) -> ()
+# stop_flow(flowid) -> ()
+
+# Prompt services, abstract the prompt generation
+class FlowRequest(Record):
+
+ operation = String() # list-classes, get-class, put-class, delete-class
+ # list-flows, get-flow, start-flow, stop-flow
+
+ # get_class, put_class, delete_class, start_flow
+ class_name = String()
+
+ # put_class
+ class_definition = String()
+
+ # start_flow
+ description = String()
+
+ # get_flow, start_flow, stop_flow
+ flow_id = String()
+
+class FlowResponse(Record):
+
+ # list_classes
+ class_names = Array(String())
+
+ # list_flows
+ flow_ids = Array(String())
+
+ # get_class
+ class_definition = String()
+
+ # get_flow
+ flow = String()
+
+ # get_flow
+ description = String()
+
+ # Everything
+ error = Error()
+
+flow_request_queue = topic(
+ 'flow', kind='non-persistent', namespace='request'
+)
+flow_response_queue = topic(
+ 'flow', kind='non-persistent', namespace='response'
+)
+
+############################################################################
+
diff --git a/trustgraph-base/trustgraph/schema/graph.py b/trustgraph-base/trustgraph/schema/graph.py
index 7c304e1d..97a99fbd 100644
--- a/trustgraph-base/trustgraph/schema/graph.py
+++ b/trustgraph-base/trustgraph/schema/graph.py
@@ -18,8 +18,6 @@ class EntityContexts(Record):
metadata = Metadata()
entities = Array(EntityContext())
-entity_contexts_ingest_queue = topic('entity-contexts-load')
-
############################################################################
# Graph embeddings are embeddings associated with a graph entity
@@ -33,8 +31,6 @@ class GraphEmbeddings(Record):
metadata = Metadata()
entities = Array(EntityEmbeddings())
-graph_embeddings_store_queue = topic('graph-embeddings-store')
-
############################################################################
# Graph embeddings query
@@ -49,13 +45,6 @@ class GraphEmbeddingsResponse(Record):
error = Error()
entities = Array(Value())
-graph_embeddings_request_queue = topic(
- 'graph-embeddings', kind='non-persistent', namespace='request'
-)
-graph_embeddings_response_queue = topic(
- 'graph-embeddings', kind='non-persistent', namespace='response'
-)
-
############################################################################
# Graph triples
@@ -64,8 +53,6 @@ class Triples(Record):
metadata = Metadata()
triples = Array(Triple())
-triples_store_queue = topic('triples-store')
-
############################################################################
# Triples query
@@ -82,9 +69,3 @@ class TriplesQueryResponse(Record):
error = Error()
triples = Array(Triple())
-triples_request_queue = topic(
- 'triples', kind='non-persistent', namespace='request'
-)
-triples_response_queue = topic(
- 'triples', kind='non-persistent', namespace='response'
-)
diff --git a/trustgraph-base/trustgraph/schema/knowledge.py b/trustgraph-base/trustgraph/schema/knowledge.py
new file mode 100644
index 00000000..21217153
--- /dev/null
+++ b/trustgraph-base/trustgraph/schema/knowledge.py
@@ -0,0 +1,61 @@
+
+from pulsar.schema import Record, Bytes, String, Array, Long, Boolean
+from . types import Triple
+from . topic import topic
+from . types import Error
+from . metadata import Metadata
+from . documents import Document, TextDocument
+from . graph import Triples, GraphEmbeddings
+
+# get-kg-core
+# -> (???)
+# <- ()
+# <- (error)
+
+# delete-kg-core
+# -> (???)
+# <- ()
+# <- (error)
+
+# list-kg-cores
+# -> (user)
+# <- ()
+# <- (error)
+
+class KnowledgeRequest(Record):
+
+ # get-kg-core, delete-kg-core, list-kg-cores, put-kg-core
+ # load-kg-core, unload-kg-core
+ operation = String()
+
+ # list-kg-cores, delete-kg-core, put-kg-core
+ user = String()
+
+ # get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
+ # load-kg-core, unload-kg-core
+ id = String()
+
+ # load-kg-core
+ flow = String()
+
+ # load-kg-core
+ collection = String()
+
+ # put-kg-core
+ triples = Triples()
+ graph_embeddings = GraphEmbeddings()
+
+class KnowledgeResponse(Record):
+ error = Error()
+ ids = Array(String())
+ eos = Boolean() # Indicates end of knowledge core stream
+ triples = Triples()
+ graph_embeddings = GraphEmbeddings()
+
+knowledge_request_queue = topic(
+ 'knowledge', kind='non-persistent', namespace='request'
+)
+knowledge_response_queue = topic(
+ 'knowledge', kind='non-persistent', namespace='response',
+)
+
diff --git a/trustgraph-base/trustgraph/schema/library.py b/trustgraph-base/trustgraph/schema/library.py
index ed52b2ad..6504fa78 100644
--- a/trustgraph-base/trustgraph/schema/library.py
+++ b/trustgraph-base/trustgraph/schema/library.py
@@ -6,46 +6,69 @@ from . types import Error
from . metadata import Metadata
from . documents import Document, TextDocument
-# add
-# -> (id, document)
+# add-document
+# -> (document_id, document_metadata, content)
# <- ()
# <- (error)
-# list
-# -> (user, collection?)
-# <- (info)
+# remove-document
+# -> (document_id)
+# <- ()
# <- (error)
-# add(Metadata, Bytes) : error?
-# copy(id, user, collection)
-# move(id, user, collection)
-# delete(id)
-# get(id) : Bytes
-# reindex(id)
-# list(user, collection) : id[]
-# info(id[]) : DocumentInfo[]
-# search([]) : id[]
+# update-document
+# -> (document_id, document_metadata)
+# <- ()
+# <- (error)
-class DocumentPackage(Record):
+# get-document-metadata
+# -> (document_id)
+# <- (document_metadata)
+# <- (error)
+
+# get-document-content
+# -> (document_id)
+# <- (content)
+# <- (error)
+
+# add-processing
+# -> (processing_id, processing_metadata)
+# <- ()
+# <- (error)
+
+# remove-processing
+# -> (processing_id)
+# <- ()
+# <- (error)
+
+# list-documents
+# -> (user, collection?)
+# <- (document_metadata[])
+# <- (error)
+
+# list-processing
+# -> (user, collection?)
+# <- (processing_metadata[])
+# <- (error)
+
+class DocumentMetadata(Record):
id = String()
- document = Bytes()
+ time = Long()
kind = String()
- user = String()
- collection = String()
title = String()
comments = String()
- time = Long()
metadata = Array(Triple())
+ user = String()
+ tags = Array(String())
-class DocumentInfo(Record):
+class ProcessingMetadata(Record):
id = String()
- kind = String()
+ document_id = String()
+ time = Long()
+ flow = String()
user = String()
collection = String()
- title = String()
- comments = String()
- time = Long()
- metadata = Array(Triple())
+ tags = Array(String())
class Criteria(Record):
key = String()
@@ -53,22 +76,51 @@ class Criteria(Record):
operator = String()
class LibrarianRequest(Record):
+
+ # add-document, remove-document, update-document, get-document-metadata,
+ # get-document-content, add-processing, remove-processing, list-documents,
+ # list-processing
operation = String()
- id = String()
- document = DocumentPackage()
+
+ # add-document, remove-document, update-document, get-document-metadata,
+ # get-document-content
+ document_id = String()
+
+ # add-processing, remove-processing
+ processing_id = String()
+
+ # add-document, update-document
+ document_metadata = DocumentMetadata()
+
+ # add-processing
+ processing_metadata = ProcessingMetadata()
+
+ # add-document
+ content = Bytes()
+
+ # list-documents, list-processing
user = String()
+
+ # list-documents?, list-processing?
collection = String()
+
+ #
criteria = Array(Criteria())
class LibrarianResponse(Record):
error = Error()
- document = DocumentPackage()
- info = Array(DocumentInfo())
+ document_metadata = DocumentMetadata()
+ content = Bytes()
+ document_metadatas = Array(DocumentMetadata())
+ processing_metadatas = Array(ProcessingMetadata())
+
+# FIXME: Is this right? Using persistence on librarian so that
+# message chunking works
librarian_request_queue = topic(
- 'librarian', kind='non-persistent', namespace='request'
+ 'librarian', kind='persistent', namespace='request'
)
librarian_response_queue = topic(
- 'librarian', kind='non-persistent', namespace='response',
+ 'librarian', kind='persistent', namespace='response',
)
diff --git a/trustgraph-base/trustgraph/schema/lookup.py b/trustgraph-base/trustgraph/schema/lookup.py
index d0a0517c..a88d188e 100644
--- a/trustgraph-base/trustgraph/schema/lookup.py
+++ b/trustgraph-base/trustgraph/schema/lookup.py
@@ -17,26 +17,5 @@ class LookupResponse(Record):
text = String()
error = Error()
-encyclopedia_lookup_request_queue = topic(
- 'encyclopedia', kind='non-persistent', namespace='request'
-)
-encyclopedia_lookup_response_queue = topic(
- 'encyclopedia', kind='non-persistent', namespace='response',
-)
-
-dbpedia_lookup_request_queue = topic(
- 'dbpedia', kind='non-persistent', namespace='request'
-)
-dbpedia_lookup_response_queue = topic(
- 'dbpedia', kind='non-persistent', namespace='response',
-)
-
-internet_search_request_queue = topic(
- 'internet-search', kind='non-persistent', namespace='request'
-)
-internet_search_response_queue = topic(
- 'internet-search', kind='non-persistent', namespace='response',
-)
-
############################################################################
diff --git a/trustgraph-base/trustgraph/schema/models.py b/trustgraph-base/trustgraph/schema/models.py
index a634e1c4..ea3b9128 100644
--- a/trustgraph-base/trustgraph/schema/models.py
+++ b/trustgraph-base/trustgraph/schema/models.py
@@ -19,13 +19,6 @@ class TextCompletionResponse(Record):
out_token = Integer()
model = String()
-text_completion_request_queue = topic(
- 'text-completion', kind='non-persistent', namespace='request'
-)
-text_completion_response_queue = topic(
- 'text-completion', kind='non-persistent', namespace='response'
-)
-
############################################################################
# Embeddings
@@ -37,9 +30,3 @@ class EmbeddingsResponse(Record):
error = Error()
vectors = Array(Array(Double()))
-embeddings_request_queue = topic(
- 'embeddings', kind='non-persistent', namespace='request'
-)
-embeddings_response_queue = topic(
- 'embeddings', kind='non-persistent', namespace='response'
-)
diff --git a/trustgraph-base/trustgraph/schema/object.py b/trustgraph-base/trustgraph/schema/object.py
index 60c2bdc3..6667fdf3 100644
--- a/trustgraph-base/trustgraph/schema/object.py
+++ b/trustgraph-base/trustgraph/schema/object.py
@@ -18,8 +18,6 @@ class ObjectEmbeddings(Record):
key_name = String()
id = String()
-object_embeddings_store_queue = topic('object-embeddings-store')
-
############################################################################
# Stores rows of information
@@ -29,5 +27,5 @@ class Rows(Record):
row_schema = RowSchema()
rows = Array(Map(String()))
-rows_store_queue = topic('rows-store')
+
diff --git a/trustgraph-base/trustgraph/schema/prompt.py b/trustgraph-base/trustgraph/schema/prompt.py
index 15eddea8..369ace53 100644
--- a/trustgraph-base/trustgraph/schema/prompt.py
+++ b/trustgraph-base/trustgraph/schema/prompt.py
@@ -55,12 +55,5 @@ class PromptResponse(Record):
# JSON encoded
object = String()
-prompt_request_queue = topic(
- 'prompt', kind='non-persistent', namespace='request'
-)
-prompt_response_queue = topic(
- 'prompt', kind='non-persistent', namespace='response'
-)
-
############################################################################
diff --git a/trustgraph-base/trustgraph/schema/retrieval.py b/trustgraph-base/trustgraph/schema/retrieval.py
index caeb8e67..1077e4f9 100644
--- a/trustgraph-base/trustgraph/schema/retrieval.py
+++ b/trustgraph-base/trustgraph/schema/retrieval.py
@@ -20,13 +20,6 @@ class GraphRagResponse(Record):
error = Error()
response = String()
-graph_rag_request_queue = topic(
- 'graph-rag', kind='non-persistent', namespace='request'
-)
-graph_rag_response_queue = topic(
- 'graph-rag', kind='non-persistent', namespace='response'
-)
-
############################################################################
# Document RAG text retrieval
@@ -41,9 +34,3 @@ class DocumentRagResponse(Record):
error = Error()
response = String()
-document_rag_request_queue = topic(
- 'doc-rag', kind='non-persistent', namespace='request'
-)
-document_rag_response_queue = topic(
- 'doc-rag', kind='non-persistent', namespace='response'
-)
diff --git a/trustgraph-bedrock/setup.py b/trustgraph-bedrock/setup.py
index 8db4520b..a0f7d544 100644
--- a/trustgraph-bedrock/setup.py
+++ b/trustgraph-bedrock/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
python_requires='>=3.8',
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
install_requires=[
- "trustgraph-base>=0.21,<0.22",
+ "trustgraph-base>=0.23,<0.24",
"pulsar-client",
"prometheus-client",
"boto3",
diff --git a/trustgraph-bedrock/trustgraph/model/text_completion/bedrock/llm.py b/trustgraph-bedrock/trustgraph/model/text_completion/bedrock/llm.py
index 9b8818a2..156030d0 100755
--- a/trustgraph-bedrock/trustgraph/model/text_completion/bedrock/llm.py
+++ b/trustgraph-bedrock/trustgraph/model/text_completion/bedrock/llm.py
@@ -6,22 +6,14 @@ Input is prompt, output is response. Mistral is default.
import boto3
import json
-from prometheus_client import Histogram
import os
import enum
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'mistral.mistral-large-2407-v1:0'
default_temperature = 0.0
default_max_output = 2048
@@ -149,16 +141,12 @@ class Cohere(ModelHandler):
Default=Mistral
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
print(params)
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
-
model = params.get("model", default_model)
temperature = params.get("temperature", default_temperature)
max_output = params.get("max_output", default_max_output)
@@ -185,30 +173,12 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"model": model,
"temperature": temperature,
"max_output": max_output,
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.model = model
self.temperature = temperature
self.max_output = max_output
@@ -257,30 +227,21 @@ class Processor(ConsumerProducer):
return Default
- async def handle(self, msg):
-
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
+ async def generate_content(self, system, prompt):
try:
- promptbody = self.variant.encode_request(v.system, v.prompt)
+ promptbody = self.variant.encode_request(system, prompt)
accept = 'application/json'
contentType = 'application/json'
- with __class__.text_completion_metric.time():
- response = self.bedrock.invoke_model(
- body=promptbody,
- modelId=self.model,
- accept=accept,
- contentType=contentType
- )
+ response = self.bedrock.invoke_model(
+ body=promptbody,
+ modelId=self.model,
+ accept=accept,
+ contentType=contentType
+ )
# Response structure decode
outputtext = self.variant.decode_response(response)
@@ -293,18 +254,14 @@ class Processor(ConsumerProducer):
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
- print("Send response...", flush=True)
- r = TextCompletionResponse(
- error=None,
- response=outputtext,
- in_token=inputtokens,
- out_token=outputtokens,
- model=str(self.model),
+ resp = LlmResult(
+ text = outputtext,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
)
- await self.send(r, properties={"id": id})
-
- print("Done.", flush=True)
+ return resp
except self.bedrock.exceptions.ThrottlingException as e:
@@ -319,31 +276,12 @@ class Processor(ConsumerProducer):
print(type(e))
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -391,5 +329,4 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
-
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-cli/scripts/tg-add-library-document b/trustgraph-cli/scripts/tg-add-library-document
new file mode 100755
index 00000000..16e8712b
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-add-library-document
@@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+
+"""
+Loads a document into the library
+"""
+
+import hashlib
+import argparse
+import os
+import time
+import uuid
+
+from trustgraph.api import Api
+from trustgraph.knowledge import hash, to_uri
+from trustgraph.knowledge import PREF_PUBEV, PREF_DOC, PREF_ORG
+from trustgraph.knowledge import Organization, PublicationEvent
+from trustgraph.knowledge import DigitalDocument
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_user = 'trustgraph'
+
+class Loader:
+
+ def __init__(
+ self, id, url, user, metadata, title, comments, kind, tags
+ ):
+
+ self.api = Api(url).library()
+
+ self.user = user
+ self.metadata = metadata
+ self.title = title
+ self.comments = comments
+ self.kind = kind
+ self.identifier = id
+
+ if tags:
+ self.tags = tags.split(",")
+ else:
+ self.tags = []
+
+ def load(self, files):
+
+ for file in files:
+ self.load_file(file)
+
+ def load_file(self, file):
+
+ try:
+
+ path = file
+ data = open(path, "rb").read()
+
+ # Create a SHA256 hash from the data
+ if self.identifier:
+ id = self.identifier
+ else:
+ id = hash(data)
+ id = to_uri(PREF_DOC, id)
+
+
+ self.metadata.id = id
+
+ self.api.add_document(
+ document=data, id=id, metadata=self.metadata,
+ user=self.user, kind=self.kind, title=self.title,
+ comments=self.comments, tags=self.tags
+ )
+
+ print(f"{file}: Loaded successfully.")
+
+ except Exception as e:
+ print(f"{file}: Failed: {str(e)}", flush=True)
+ raise e
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-add-library-document',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ parser.add_argument(
+ '--name', help=f'Document name'
+ )
+
+ parser.add_argument(
+ '--description', help=f'Document description'
+ )
+
+ parser.add_argument(
+ '--copyright-notice', help=f'Copyright notice'
+ )
+
+ parser.add_argument(
+ '--copyright-holder', help=f'Copyright holder'
+ )
+
+ parser.add_argument(
+ '--copyright-year', help=f'Copyright year'
+ )
+
+ parser.add_argument(
+ '--license', help=f'Copyright license'
+ )
+
+ parser.add_argument(
+ '--publication-organization', help=f'Publication organization'
+ )
+
+ parser.add_argument(
+ '--publication-description', help=f'Publication description'
+ )
+
+ parser.add_argument(
+ '--publication-date', help=f'Publication date'
+ )
+
+ parser.add_argument(
+ '--document-url', help=f'Document URL'
+ )
+
+ parser.add_argument(
+ '--keyword', nargs='+', help=f'Keyword'
+ )
+
+ parser.add_argument(
+ '--identifier', '--id', help=f'Document ID'
+ )
+
+ parser.add_argument(
+ '-k', '--kind',
+ required=True,
+ help=f'Document MIME type'
+ )
+
+ parser.add_argument(
+ '--tags',
+ help=f'Tags, command separated'
+ )
+
+ parser.add_argument(
+ 'files', nargs='+',
+ help=f'File to load'
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ document = DigitalDocument(
+ args.identifier,
+ name=args.name,
+ description=args.description,
+ copyright_notice=args.copyright_notice,
+ copyright_holder=args.copyright_holder,
+ copyright_year=args.copyright_year,
+ license=args.license,
+ url=args.document_url,
+ keywords=args.keyword,
+ )
+
+ if args.publication_organization:
+ org = Organization(
+ id=to_uri(PREF_ORG, hash(args.publication_organization)),
+ name=args.publication_organization,
+ )
+ document.publication = PublicationEvent(
+ id = to_uri(PREF_PUBEV, str(uuid.uuid4())),
+ organization=org,
+ description=args.publication_description,
+ start_date=args.publication_date,
+ end_date=args.publication_date,
+ )
+
+ p = Loader(
+ id=args.identifier,
+ url=args.url,
+ user=args.user,
+ metadata=document,
+ title=args.name,
+ comments=args.description,
+ kind=args.kind,
+ tags=args.tags,
+ )
+
+ p.load(args.files)
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-delete-flow-class b/trustgraph-cli/scripts/tg-delete-flow-class
new file mode 100755
index 00000000..8ca7adb5
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-delete-flow-class
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+
+"""
+Deletes a flow class
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def delete_flow_class(url, class_name):
+
+ api = Api(url).flow()
+
+ class_names = api.delete_class(class_name)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-delete-flow-class',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-n', '--class-name',
+ help=f'Flow class name',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ delete_flow_class(
+ url=args.api_url,
+ class_name=args.class_name,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-delete-kg-core b/trustgraph-cli/scripts/tg-delete-kg-core
new file mode 100755
index 00000000..c9b635aa
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-delete-kg-core
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+"""
+Deletes a flow class
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def delete_kg_core(url, user, id):
+
+ api = Api(url).knowledge()
+
+ class_names = api.delete_kg_core(user = user, id = id)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-delete-flow-class',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default="trustgraph",
+ help='API URL (default: trustgraph)',
+ )
+
+ parser.add_argument(
+ '--id', '--identifier',
+ required=True,
+ help=f'Knowledge core ID',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ delete_kg_core(
+ url=args.api_url,
+ user=args.user,
+ id=args.id,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-get-flow-class b/trustgraph-cli/scripts/tg-get-flow-class
new file mode 100755
index 00000000..abe88cba
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-get-flow-class
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+"""
+Outputs a flow class definition in JSON format.
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def get_flow_class(url, class_name):
+
+ api = Api(url).flow()
+
+ cls = api.get_class(class_name)
+
+ print(json.dumps(cls, indent=4))
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-get-flow-class',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-n', '--class-name',
+ required=True,
+ help=f'Flow class name',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ get_flow_class(
+ url=args.api_url,
+ class_name=args.class_name,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-get-kg-core b/trustgraph-cli/scripts/tg-get-kg-core
new file mode 100755
index 00000000..6eb52bde
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-get-kg-core
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+
+"""
+Uses the knowledge service to fetch a knowledge core which is saved
+to a local file in msgpack format.
+"""
+
+import argparse
+import os
+import textwrap
+import uuid
+import asyncio
+import json
+from websockets.asyncio.client import connect
+import msgpack
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
+default_user = 'trustgraph'
+
+def write_triple(f, data):
+ msg = (
+ "t",
+ {
+ "m": {
+ "i": data["metadata"]["id"],
+ "m": data["metadata"]["metadata"],
+ "u": data["metadata"]["user"],
+ "c": data["metadata"]["collection"],
+ },
+ "t": data["triples"],
+ }
+ )
+ f.write(msgpack.packb(msg, use_bin_type=True))
+
+def write_ge(f, data):
+ msg = (
+ "ge",
+ {
+ "m": {
+ "i": data["metadata"]["id"],
+ "m": data["metadata"]["metadata"],
+ "u": data["metadata"]["user"],
+ "c": data["metadata"]["collection"],
+ },
+ "e": [
+ {
+ "e": ent["entity"],
+ "v": ent["vectors"],
+ }
+ for ent in data["entities"]
+ ]
+ }
+ )
+ f.write(msgpack.packb(msg, use_bin_type=True))
+
+async def fetch(url, user, id, output):
+
+ if not url.endswith("/"):
+ url += "/"
+
+ url = url + "api/v1/socket"
+
+ mid = str(uuid.uuid4())
+
+ async with connect(url) as ws:
+
+ req = json.dumps({
+ "id": mid,
+ "service": "knowledge",
+ "request": {
+ "operation": "get-kg-core",
+ "user": user,
+ "id": id,
+ }
+ })
+
+ await ws.send(req)
+
+ ge = 0
+ t = 0
+
+ with open(output, "wb") as f:
+
+ while True:
+
+ msg = await ws.recv()
+
+ obj = json.loads(msg)
+
+ if "response" not in obj:
+ raise RuntimeError("No response?")
+
+ response = obj["response"]
+
+ if "error" in response:
+ raise RuntimeError(obj["error"])
+
+ if "eos" in response:
+ if response["eos"]: break
+
+ if "triples" in response:
+ t += 1
+ write_triple(f, response["triples"])
+
+ if "graph-embeddings" in response:
+ ge += 1
+ write_ge(f, response["graph-embeddings"])
+
+ print(f"Got: {t} triple, {ge} GE messages.")
+
+ await ws.close()
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-get-kg-core',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ parser.add_argument(
+ '--id', '--identifier',
+ required=True,
+ help=f'Knowledge core ID',
+ )
+
+ parser.add_argument(
+ '-o', '--output',
+ required=True,
+ help=f'Output file'
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ asyncio.run(
+ fetch(
+ url = args.url,
+ user = args.user,
+ id = args.id,
+ output = args.output,
+ )
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-graph-to-turtle b/trustgraph-cli/scripts/tg-graph-to-turtle
index fc17ddd0..a7607986 100755
--- a/trustgraph-cli/scripts/tg-graph-to-turtle
+++ b/trustgraph-cli/scripts/tg-graph-to-turtle
@@ -17,14 +17,14 @@ default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_user = 'trustgraph'
default_collection = 'default'
-def show_graph(url, user, collection):
+def show_graph(url, flow_id, user, collection):
- api = Api(url)
+ api = Api(url).flow().id(flow_id)
rows = api.triples_query(
s=None, p=None, o=None,
+ user=user, collection=collection,
limit=10_000)
-# user=user, collection=collection,
g = rdflib.Graph()
@@ -69,6 +69,12 @@ def main():
help=f'API URL (default: {default_url})',
)
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
parser.add_argument(
'-U', '--user',
default=default_user,
@@ -86,9 +92,10 @@ def main():
try:
show_graph(
- url=args.api_url,
- user=args.user,
- collection=args.collection
+ url = args.api_url,
+ flow_id = args.flow_id,
+ user = args.user,
+ collection = args.collection,
)
except Exception as e:
diff --git a/trustgraph-cli/scripts/tg-init-pulsar b/trustgraph-cli/scripts/tg-init-pulsar
deleted file mode 100755
index 07fd31eb..00000000
--- a/trustgraph-cli/scripts/tg-init-pulsar
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Initialises Pulsar with Trustgraph tenant / namespaces & policy.
-"""
-
-import requests
-import time
-import argparse
-
-default_pulsar_admin_url = "http://pulsar:8080"
-
-def get_clusters(url):
-
- print("Get clusters...", flush=True)
-
- resp = requests.get(f"{url}/admin/v2/clusters")
-
- if resp.status_code != 200: raise RuntimeError("Could not fetch clusters")
-
- return resp.json()
-
-def ensure_tenant(url, tenant, clusters):
-
- resp = requests.get(f"{url}/admin/v2/tenants/{tenant}")
-
- if resp.status_code == 200:
- print(f"Tenant {tenant} already exists.", flush=True)
- return
-
- resp = requests.put(
- f"{url}/admin/v2/tenants/{tenant}",
- json={
- "adminRoles": [],
- "allowedClusters": clusters,
- }
- )
-
- if resp.status_code != 204:
- print(resp.text, flush=True)
- raise RuntimeError("Tenant creation failed.")
-
- print(f"Tenant {tenant} created.", flush=True)
-
-def ensure_namespace(url, tenant, namespace, config):
-
- resp = requests.get(f"{url}/admin/v2/namespaces/{tenant}/{namespace}")
-
- if resp.status_code == 200:
- print(f"Namespace {tenant}/{namespace} already exists.", flush=True)
- return
-
- resp = requests.put(
- f"{url}/admin/v2/namespaces/{tenant}/{namespace}",
- json=config,
- )
-
- if resp.status_code != 204:
- print(resp.status_code, flush=True)
- print(resp.text, flush=True)
- raise RuntimeError(f"Namespace {tenant}/{namespace} creation failed.")
-
- print(f"Namespace {tenant}/{namespace} created.", flush=True)
-
-def init(url, tenant="tg"):
-
- clusters = get_clusters(url)
-
- ensure_tenant(url, tenant, clusters)
-
- ensure_namespace(url, tenant, "flow", {})
-
- ensure_namespace(url, tenant, "request", {})
-
- ensure_namespace(url, tenant, "response", {
- "retention_policies": {
- "retentionSizeInMB": -1,
- "retentionTimeInMinutes": 3,
- }
- })
-
-def main():
-
- parser = argparse.ArgumentParser(
- prog='tg-init-pulsar',
- description=__doc__,
- )
-
- parser.add_argument(
- '-p', '--pulsar-admin-url',
- default=default_pulsar_admin_url,
- help=f'Pulsar admin URL (default: {default_pulsar_admin_url})',
- )
-
- args = parser.parse_args()
-
- while True:
-
- try:
-
- print(flush=True)
- print(
- f"Initialising with Pulsar {args.pulsar_admin_url}...",
- flush=True
- )
- init(args.pulsar_admin_url, "tg")
- print("Initialisation complete.", flush=True)
- break
-
- except Exception as e:
-
- print("Exception:", e, flush=True)
-
- print("Sleeping...", flush=True)
- time.sleep(2)
- print("Will retry...", flush=True)
-
-main()
-
diff --git a/trustgraph-cli/scripts/tg-init-trustgraph b/trustgraph-cli/scripts/tg-init-trustgraph
new file mode 100755
index 00000000..2265437e
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-init-trustgraph
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+
+"""
+Initialises Pulsar with Trustgraph tenant / namespaces & policy.
+"""
+
+import requests
+import time
+import argparse
+import json
+
+from trustgraph.clients.config_client import ConfigClient
+
+default_pulsar_admin_url = "http://pulsar:8080"
+default_pulsar_host = "pulsar://pulsar:6650"
+subscriber = "tg-init-pulsar"
+
+def get_clusters(url):
+
+ print("Get clusters...", flush=True)
+
+ resp = requests.get(f"{url}/admin/v2/clusters")
+
+ if resp.status_code != 200: raise RuntimeError("Could not fetch clusters")
+
+ return resp.json()
+
+def ensure_tenant(url, tenant, clusters):
+
+ resp = requests.get(f"{url}/admin/v2/tenants/{tenant}")
+
+ if resp.status_code == 200:
+ print(f"Tenant {tenant} already exists.", flush=True)
+ return
+
+ resp = requests.put(
+ f"{url}/admin/v2/tenants/{tenant}",
+ json={
+ "adminRoles": [],
+ "allowedClusters": clusters,
+ }
+ )
+
+ if resp.status_code != 204:
+ print(resp.text, flush=True)
+ raise RuntimeError("Tenant creation failed.")
+
+ print(f"Tenant {tenant} created.", flush=True)
+
+def ensure_namespace(url, tenant, namespace, config):
+
+ resp = requests.get(f"{url}/admin/v2/namespaces/{tenant}/{namespace}")
+
+ if resp.status_code == 200:
+ print(f"Namespace {tenant}/{namespace} already exists.", flush=True)
+ return
+
+ resp = requests.put(
+ f"{url}/admin/v2/namespaces/{tenant}/{namespace}",
+ json=config,
+ )
+
+ if resp.status_code != 204:
+ print(resp.status_code, flush=True)
+ print(resp.text, flush=True)
+ raise RuntimeError(f"Namespace {tenant}/{namespace} creation failed.")
+
+ print(f"Namespace {tenant}/{namespace} created.", flush=True)
+
+def ensure_config(config, pulsar_host, pulsar_api_key):
+
+ cli = ConfigClient(
+ subscriber=subscriber,
+ pulsar_host=pulsar_host,
+ pulsar_api_key=pulsar_api_key,
+ )
+
+ while True:
+
+ try:
+
+ print("Get current config...", flush=True)
+ current, version = cli.config(timeout=5)
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+ time.sleep(2)
+ print("Retrying...", flush=True)
+ continue
+
+ print("Current config version is", version, flush=True)
+
+ if version != 0:
+ print("Already updated, not updating config. Done.", flush=True)
+ return
+
+ print("Config is version 0, updating...", flush=True)
+
+ batch = []
+
+ for type in config:
+ for key in config[type]:
+ print(f"Adding {type}/{key} to update.", flush=True)
+ batch.append({
+ "type": type,
+ "key": key,
+ "value": json.dumps(config[type][key]),
+ })
+
+ try:
+ cli.put(batch, timeout=10)
+ print("Update succeeded.", flush=True)
+ break
+ except Exception as e:
+ print("Exception:", e, flush=True)
+ time.sleep(2)
+ print("Retrying...", flush=True)
+ continue
+
+def init(pulsar_admin_url, pulsar_host, pulsar_api_key, config, tenant):
+
+ clusters = get_clusters(pulsar_admin_url)
+
+ ensure_tenant(pulsar_admin_url, tenant, clusters)
+
+ ensure_namespace(pulsar_admin_url, tenant, "flow", {})
+
+ ensure_namespace(pulsar_admin_url, tenant, "request", {})
+
+ ensure_namespace(pulsar_admin_url, tenant, "response", {
+ "retention_policies": {
+ "retentionSizeInMB": -1,
+ "retentionTimeInMinutes": 3,
+ "subscriptionExpirationTimeMinutes": 30,
+ }
+ })
+
+ ensure_namespace(pulsar_admin_url, tenant, "config", {
+ "retention_policies": {
+ "retentionSizeInMB": 10,
+ "retentionTimeInMinutes": -1,
+ "subscriptionExpirationTimeMinutes": 5,
+ }
+ })
+
+ if config is not None:
+
+ try:
+ print("Decoding config...", flush=True)
+ dec = json.loads(config)
+ print("Decoded.", flush=True)
+ except Exception as e:
+ print("Exception:", e, flush=True)
+ raise e
+
+ ensure_config(dec, pulsar_host, pulsar_api_key)
+
+ else:
+ print("No config to update.", flush=True)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-init-trustgraph',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-p', '--pulsar-admin-url',
+ default=default_pulsar_admin_url,
+ help=f'Pulsar admin URL (default: {default_pulsar_admin_url})',
+ )
+
+ parser.add_argument(
+ '--pulsar-host',
+ default=default_pulsar_host,
+ help=f'Pulsar host (default: {default_pulsar_host})',
+ )
+
+ parser.add_argument(
+ '--pulsar-api-key',
+ help=f'Pulsar API key',
+ )
+
+ parser.add_argument(
+ '-c', '--config',
+ help=f'Initial configuration to load',
+ )
+
+ parser.add_argument(
+ '-t', '--tenant',
+ default="tg",
+ help=f'Tenant (default: tg)',
+ )
+
+ args = parser.parse_args()
+
+ while True:
+
+ try:
+
+ print(flush=True)
+ print(
+ f"Initialising with Pulsar {args.pulsar_admin_url}...",
+ flush=True
+ )
+ init(**vars(args))
+ print("Initialisation complete.", flush=True)
+ break
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+ print("Sleeping...", flush=True)
+ time.sleep(2)
+ print("Will retry...", flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-invoke-agent b/trustgraph-cli/scripts/tg-invoke-agent
index 5e213447..8a148d00 100755
--- a/trustgraph-cli/scripts/tg-invoke-agent
+++ b/trustgraph-cli/scripts/tg-invoke-agent
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
-Uses the GraphRAG service to answer a question
+Uses the agent service to answer a question
"""
import argparse
@@ -30,7 +30,7 @@ def output(text, prefix="> ", width=78):
print(out)
async def question(
- url, question, user, collection,
+ url, question, flow_id, user, collection,
plan=None, state=None, verbose=False
):
@@ -60,6 +60,7 @@ async def question(
req = json.dumps({
"id": mid,
"service": "agent",
+ "flow": flow_id,
"request": {
"question": question,
}
@@ -74,6 +75,9 @@ async def question(
obj = json.loads(msg)
+ if "error" in obj:
+ raise RuntimeError(obj["error"])
+
if obj["id"] != mid:
print("Ignore message")
continue
@@ -104,6 +108,12 @@ def main():
help=f'API URL (default: {default_url})',
)
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
parser.add_argument(
'-q', '--question',
required=True,
@@ -137,12 +147,6 @@ def main():
action="store_true",
help=f'Output thinking/observations'
)
-
- # parser.add_argument(
- # '--pulsar-api-key',
- # default=default_pulsar_api_key,
- # help=f'Pulsar API key',
- # )
args = parser.parse_args()
@@ -150,13 +154,14 @@ def main():
asyncio.run(
question(
- url=args.url,
- question=args.question,
- user=args.user,
- collection=args.collection,
- plan=args.plan,
- state=args.state,
- verbose=args.verbose,
+ url = args.url,
+ flow_id = args.flow_id,
+ question = args.question,
+ user = args.user,
+ collection = args.collection,
+ plan = args.plan,
+ state = args.state,
+ verbose = args.verbose,
)
)
diff --git a/trustgraph-cli/scripts/tg-invoke-document-rag b/trustgraph-cli/scripts/tg-invoke-document-rag
index 759d4200..9f70b1dc 100755
--- a/trustgraph-cli/scripts/tg-invoke-document-rag
+++ b/trustgraph-cli/scripts/tg-invoke-document-rag
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
-Uses the GraphRAG service to answer a question
+Uses the DocumentRAG service to answer a question
"""
import argparse
@@ -13,11 +13,11 @@ default_user = 'trustgraph'
default_collection = 'default'
default_doc_limit = 10
-def question(url, question, user, collection, doc_limit):
+def question(url, flow_id, question, user, collection, doc_limit):
- rag = Api(url)
+ api = Api(url).flow().id(flow_id)
- resp = rag.document_rag(
+ resp = api.document_rag(
question=question, user=user, collection=collection,
doc_limit=doc_limit,
)
@@ -37,11 +37,11 @@ def main():
help=f'API URL (default: {default_url})',
)
- # parser.add_argument(
- # '--pulsar-api-key',
- # default=default_pulsar_api_key,
- # help=f'Pulsar API key',
- # )
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
parser.add_argument(
'-q', '--question',
@@ -73,6 +73,7 @@ def main():
question(
url=args.url,
+ flow_id = args.flow_id,
question=args.question,
user=args.user,
collection=args.collection,
diff --git a/trustgraph-cli/scripts/tg-invoke-graph-rag b/trustgraph-cli/scripts/tg-invoke-graph-rag
index 5bbe5f59..85652d74 100755
--- a/trustgraph-cli/scripts/tg-invoke-graph-rag
+++ b/trustgraph-cli/scripts/tg-invoke-graph-rag
@@ -17,13 +17,13 @@ default_max_subgraph_size = 150
default_max_path_length = 2
def question(
- url, question, user, collection, entity_limit, triple_limit,
+ url, flow_id, question, user, collection, entity_limit, triple_limit,
max_subgraph_size, max_path_length
):
- rag = Api(url)
+ api = Api(url).flow().id(flow_id)
- resp = rag.graph_rag(
+ resp = api.graph_rag(
question=question, user=user, collection=collection,
entity_limit=entity_limit, triple_limit=triple_limit,
max_subgraph_size=max_subgraph_size,
@@ -45,6 +45,12 @@ def main():
help=f'API URL (default: {default_url})',
)
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
parser.add_argument(
'-q', '--question',
required=True,
@@ -93,6 +99,7 @@ def main():
question(
url=args.url,
+ flow_id = args.flow_id,
question=args.question,
user=args.user,
collection=args.collection,
diff --git a/trustgraph-cli/scripts/tg-invoke-llm b/trustgraph-cli/scripts/tg-invoke-llm
index eb469b6e..86c8d60f 100755
--- a/trustgraph-cli/scripts/tg-invoke-llm
+++ b/trustgraph-cli/scripts/tg-invoke-llm
@@ -12,9 +12,9 @@ from trustgraph.api import Api
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
-def query(url, system, prompt):
+def query(url, flow_id, system, prompt):
- api = Api(url)
+ api = Api(url).flow().id(flow_id)
resp = api.text_completion(system=system, prompt=prompt)
@@ -44,12 +44,12 @@ def main():
nargs=1,
help='LLM prompt e.g. What is 2 + 2?',
)
-
- # parser.add_argument(
- # '--pulsar-api-key',
- # default=default_pulsar_api_key,
- # help=f'Pulsar API key',
- # )
+
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
args = parser.parse_args()
@@ -57,6 +57,7 @@ def main():
query(
url=args.url,
+ flow_id = args.flow_id,
system=args.system[0],
prompt=args.prompt[0],
)
diff --git a/trustgraph-cli/scripts/tg-invoke-prompt b/trustgraph-cli/scripts/tg-invoke-prompt
index 426fe1ee..49697090 100755
--- a/trustgraph-cli/scripts/tg-invoke-prompt
+++ b/trustgraph-cli/scripts/tg-invoke-prompt
@@ -16,9 +16,9 @@ from trustgraph.api import Api
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
-def query(url, template_id, variables):
+def query(url, flow_id, template_id, variables):
- api = Api(url)
+ api = Api(url).flow().id(flow_id)
resp = api.prompt(id=template_id, variables=variables)
@@ -40,6 +40,12 @@ def main():
help=f'API URL (default: {default_url})',
)
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
parser.add_argument(
'id',
metavar='template-id',
@@ -54,12 +60,6 @@ def main():
help='''Prompt template terms of the form variable=value, can be
specified multiple times''',
)
-
- # parser.add_argument(
- # '--pulsar-api-key',
- # default=default_pulsar_api_key,
- # help=f'Pulsar API key',
- # )
args = parser.parse_args()
@@ -77,6 +77,7 @@ specified multiple times''',
query(
url=args.url,
+ flow_id=args.flow_id,
template_id=args.id[0],
variables=variables,
)
diff --git a/trustgraph-cli/scripts/tg-load-doc-embeds b/trustgraph-cli/scripts/tg-load-doc-embeds
index d445ec5a..567ccb68 100755
--- a/trustgraph-cli/scripts/tg-load-doc-embeds
+++ b/trustgraph-cli/scripts/tg-load-doc-embeds
@@ -1,8 +1,9 @@
#!/usr/bin/env python3
-"""This utility takes a knowledge core and loads it into a running TrustGraph
-through the API. The knowledge core should be in msgpack format, which is the
-default format produce by tg-save-kg-core.
+"""
+This utility takes a document embeddings core and loads it into a running
+TrustGraph through the API. The document embeddings core should be in msgpack
+format, which is the default format produce by tg-save-doc-embeds.
"""
import aiohttp
@@ -27,7 +28,7 @@ async def load_de(running, queue, url):
async with aiohttp.ClientSession() as session:
- async with session.ws_connect(f"{url}load/document-embeddings") as ws:
+ async with session.ws_connect(url) as ws:
while running.get():
@@ -141,6 +142,9 @@ async def run(running, **args):
# grow to eat all memory
de_q = asyncio.Queue(maxsize=10)
+ url = args["url"]
+ flow_id = args["flow_id"]
+
load_task = asyncio.create_task(
loader(
running=running,
@@ -154,7 +158,8 @@ async def run(running, **args):
de_task = asyncio.create_task(
load_de(
running=running,
- queue=de_q, url=args["url"] + "api/v1/"
+ queue=de_q,
+ url = f"{url}api/v1/flow/{flow_id}/import/document-embeddings"
)
)
@@ -170,7 +175,7 @@ async def run(running, **args):
async def main(running):
parser = argparse.ArgumentParser(
- prog='tg-load-kg-core',
+ prog='tg-load-doc-embeds',
description=__doc__,
)
@@ -184,6 +189,12 @@ async def main(running):
help=f'TrustGraph API URL (default: {default_url})',
)
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
parser.add_argument(
'-i', '--input-file',
# Make it mandatory, difficult to over-write an existing file
diff --git a/trustgraph-cli/scripts/tg-load-kg-core b/trustgraph-cli/scripts/tg-load-kg-core
index b79ec992..2ecdc588 100755
--- a/trustgraph-cli/scripts/tg-load-kg-core
+++ b/trustgraph-cli/scripts/tg-load-kg-core
@@ -1,291 +1,80 @@
#!/usr/bin/env python3
-"""This utility takes a knowledge core and loads it into a running TrustGraph
-through the API. The knowledge core should be in msgpack format, which is the
-default format produce by tg-save-kg-core.
+"""
+Starts a load operation on a knowledge core which is already stored by
+the knowledge manager. You could load a core with tg-put-kg-core and then
+run this utility.
"""
-import aiohttp
-import asyncio
-import msgpack
-import json
-import sys
import argparse
import os
-import signal
+import tabulate
+from trustgraph.api import Api
+import json
-class Running:
- def __init__(self): self.running = True
- def get(self): return self.running
- def stop(self): self.running = False
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_flow = "0000"
+default_collection = "default"
-ge_counts = 0
-t_counts = 0
+def load_kg_core(url, user, id, flow, collection):
-async def load_ge(running, queue, url):
+ api = Api(url).knowledge()
- global ge_counts
+ class_names = api.load_kg_core(user = user, id = id, flow=flow,
+ collection=collection)
- async with aiohttp.ClientSession() as session:
+def main():
- async with session.ws_connect(f"{url}load/graph-embeddings") as ws:
-
- while running.get():
-
- try:
- msg = await asyncio.wait_for(queue.get(), 1)
-
- # End of load
- if msg is None:
- break
-
- except:
- # Hopefully it's TimeoutError. Annoying to match since
- # it changed in 3.11.
- continue
-
- msg = {
- "metadata": {
- "id": msg["m"]["i"],
- "metadata": msg["m"]["m"],
- "user": msg["m"]["u"],
- "collection": msg["m"]["c"],
- },
- "entities": [
- {
- "entity": ent["e"],
- "vectors": ent["v"],
- }
- for ent in msg["e"]
- ],
- }
-
- try:
- await ws.send_json(msg)
- except Exception as e:
- print(e)
-
- ge_counts += 1
-
-async def load_triples(running, queue, url):
-
- global t_counts
-
- async with aiohttp.ClientSession() as session:
-
- async with session.ws_connect(f"{url}load/triples") as ws:
-
- while running.get():
-
- try:
- msg = await asyncio.wait_for(queue.get(), 1)
-
- # End of load
- if msg is None:
- break
-
- except:
- # Hopefully it's TimeoutError. Annoying to match since
- # it changed in 3.11.
- continue
-
- msg ={
- "metadata": {
- "id": msg["m"]["i"],
- "metadata": msg["m"]["m"],
- "user": msg["m"]["u"],
- "collection": msg["m"]["c"],
- },
- "triples": msg["t"],
- }
-
- try:
- await ws.send_json(msg)
- except Exception as e:
- print(e)
-
- t_counts += 1
-
-async def stats(running):
-
- global t_counts
- global ge_counts
-
- while running.get():
-
- await asyncio.sleep(2)
-
- print(
- f"Graph embeddings: {ge_counts:10d} Triples: {t_counts:10d}"
- )
-
-async def loader(running, ge_queue, t_queue, path, format, user, collection):
-
- if format == "json":
-
- raise RuntimeError("Not implemented")
-
- else:
-
- with open(path, "rb") as f:
-
- unpacker = msgpack.Unpacker(f, raw=False)
-
- while running.get():
-
- try:
- unpacked = unpacker.unpack()
- except:
- break
-
- if user:
- unpacked["metadata"]["user"] = user
-
- if collection:
- unpacked["metadata"]["collection"] = collection
-
- if unpacked[0] == "t":
- qtype = t_queue
- else:
- if unpacked[0] == "ge":
- qtype = ge_queue
-
- while running.get():
-
- try:
- await asyncio.wait_for(qtype.put(unpacked[1]), 0.5)
-
- # Successful put message, move on
- break
-
- except:
- # Hopefully it's TimeoutError. Annoying to match since
- # it changed in 3.11.
- continue
-
- if not running.get(): break
-
- # Put 'None' on end of queue to finish
- while running.get():
-
- try:
- await asyncio.wait_for(t_queue.put(None), 1)
-
- # Successful put message, move on
- break
-
- except:
- # Hopefully it's TimeoutError. Annoying to match since
- # it changed in 3.11.
- continue
-
- # Put 'None' on end of queue to finish
- while running.get():
-
- try:
- await asyncio.wait_for(ge_queue.put(None), 1)
-
- # Successful put message, move on
- break
-
- except:
- # Hopefully it's TimeoutError. Annoying to match since
- # it changed in 3.11.
- continue
-
-async def run(running, **args):
-
- # Maxsize on queues reduces back-pressure so tg-load-kg-core doesn't
- # grow to eat all memory
- ge_q = asyncio.Queue(maxsize=10)
- t_q = asyncio.Queue(maxsize=10)
-
- load_task = asyncio.create_task(
- loader(
- running=running,
- ge_queue=ge_q, t_queue=t_q,
- path=args["input_file"], format=args["format"],
- user=args["user"], collection=args["collection"],
- )
-
- )
-
- ge_task = asyncio.create_task(
- load_ge(
- running=running,
- queue=ge_q, url=args["url"] + "api/v1/"
- )
- )
-
- triples_task = asyncio.create_task(
- load_triples(
- running=running,
- queue=t_q, url=args["url"] + "api/v1/"
- )
- )
-
- stats_task = asyncio.create_task(stats(running))
-
- await triples_task
- await ge_task
-
- running.stop()
-
- await load_task
- await stats_task
-
-async def main(running):
-
parser = argparse.ArgumentParser(
- prog='tg-load-kg-core',
+ prog='tg-delete-flow-class',
description=__doc__,
)
- default_url = os.getenv("TRUSTGRAPH_API", "http://localhost:8088/")
- default_user = "trustgraph"
- collection = "default"
-
parser.add_argument(
- '-u', '--url',
+ '-u', '--api-url',
default=default_url,
- help=f'TrustGraph API URL (default: {default_url})',
+ help=f'API URL (default: {default_url})',
)
parser.add_argument(
- '-i', '--input-file',
- # Make it mandatory, difficult to over-write an existing file
+ '-U', '--user',
+ default="trustgraph",
+ help='API URL (default: trustgraph)',
+ )
+
+ parser.add_argument(
+ '--id', '--identifier',
required=True,
- help=f'Output file'
+ help=f'Knowledge core ID',
)
parser.add_argument(
- '--format',
- default="msgpack",
- choices=["msgpack", "json"],
- help=f'Output format (default: msgpack)',
+ '-f', '--flow-id',
+ default=default_flow,
+ help=f'Flow ID (default: {default_flow}',
)
parser.add_argument(
- '--user',
- help=f'User ID to load as (default: from input)'
- )
-
- parser.add_argument(
- '--collection',
- help=f'Collection ID to load as (default: from input)'
+ '-c', '--collection',
+ default=default_collection,
+ help=f'Collection ID (default: {default_collection}',
)
args = parser.parse_args()
- await run(running, **vars(args))
+ try:
-running = Running()
+ load_kg_core(
+ url=args.api_url,
+ user=args.user,
+ id=args.id,
+ flow=args.flow_id,
+ collection=args.collection,
+ )
-def interrupt(sig, frame):
- running.stop()
- print('Interrupt')
+ except Exception as e:
-signal.signal(signal.SIGINT, interrupt)
+ print("Exception:", e, flush=True)
-asyncio.run(main(running))
+main()
diff --git a/trustgraph-cli/scripts/tg-load-pdf b/trustgraph-cli/scripts/tg-load-pdf
index 3e960c67..a9fa4ce1 100755
--- a/trustgraph-cli/scripts/tg-load-pdf
+++ b/trustgraph-cli/scripts/tg-load-pdf
@@ -1,11 +1,12 @@
#!/usr/bin/env python3
"""
-Loads a PDF document into TrustGraph processing.
+Loads a PDF document into TrustGraph processing by directing to
+the pdf-decoder queue.
+Consider using tg-add-library-document to load
+a document, followed by tg-start-library-processing to initiate processing.
"""
-import pulsar
-from pulsar.schema import JsonSchema
import hashlib
import argparse
import os
@@ -27,13 +28,13 @@ class Loader:
def __init__(
self,
url,
+ flow_id,
user,
collection,
metadata,
- pulsar_api_key=None,
):
- self.api = Api(url)
+ self.api = Api(url).flow().id(flow_id)
self.user = user
self.collection = collection
@@ -60,14 +61,15 @@ class Loader:
self.api.load_document(
document=data, id=id, metadata=self.metadata,
-# user=self.user,
-# collection=self.collection,
+ user=self.user,
+ collection=self.collection,
)
print(f"{file}: Loaded successfully.")
except Exception as e:
print(f"{file}: Failed: {str(e)}", flush=True)
+ raise e
def main():
@@ -82,6 +84,12 @@ def main():
help=f'API URL (default: {default_url})',
)
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
parser.add_argument(
'-U', '--user',
default=default_user,
@@ -149,53 +157,46 @@ def main():
args = parser.parse_args()
- while True:
+ try:
- try:
+ document = DigitalDocument(
+ id,
+ name=args.name,
+ description=args.description,
+ copyright_notice=args.copyright_notice,
+ copyright_holder=args.copyright_holder,
+ copyright_year=args.copyright_year,
+ license=args.license,
+ url=args.document_url,
+ keywords=args.keyword,
+ )
- document = DigitalDocument(
- id,
- name=args.name,
- description=args.description,
- copyright_notice=args.copyright_notice,
- copyright_holder=args.copyright_holder,
- copyright_year=args.copyright_year,
- license=args.license,
- url=args.document_url,
- keywords=args.keyword,
+ if args.publication_organization:
+ org = Organization(
+ id=to_uri(PREF_ORG, hash(args.publication_organization)),
+ name=args.publication_organization,
+ )
+ document.publication = PublicationEvent(
+ id = to_uri(PREF_PUBEV, str(uuid.uuid4())),
+ organization=org,
+ description=args.publication_description,
+ start_date=args.publication_date,
+ end_date=args.publication_date,
)
- if args.publication_organization:
- org = Organization(
- id=to_uri(PREF_ORG, hash(args.publication_organization)),
- name=args.publication_organization,
- )
- document.publication = PublicationEvent(
- id = to_uri(PREF_PUBEV, str(uuid.uuid4())),
- organization=org,
- description=args.publication_description,
- start_date=args.publication_date,
- end_date=args.publication_date,
- )
+ p = Loader(
+ url=args.url,
+ flow_id = args.flow_id,
+ user=args.user,
+ collection=args.collection,
+ metadata=document,
+ )
- p = Loader(
- url=args.url,
- user=args.user,
- collection=args.collection,
- metadata=document,
- )
+ p.load(args.files)
- p.load(args.files)
+ except Exception as e:
- print("All done.")
- break
-
- except Exception as e:
-
- print("Exception:", e, flush=True)
- print("Will retry...", flush=True)
-
- time.sleep(10)
+ print("Exception:", e, flush=True)
main()
diff --git a/trustgraph-cli/scripts/tg-load-sample-documents b/trustgraph-cli/scripts/tg-load-sample-documents
new file mode 100755
index 00000000..2bbad89f
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-load-sample-documents
@@ -0,0 +1,749 @@
+#!/usr/bin/env python3
+
+"""
+Loads a PDF document into the library
+"""
+
+import argparse
+import os
+import uuid
+import datetime
+import requests
+
+from trustgraph.api import Api
+from trustgraph.api.types import hash, Uri, Literal, Triple
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_user = 'trustgraph'
+
+
+from requests.adapters import HTTPAdapter
+from urllib3.response import HTTPResponse
+
+class FileAdapter(HTTPAdapter):
+ def send(self, request, *args, **kwargs):
+ resp = HTTPResponse(body=open(request.url[7:], 'rb'), status=200, preload_content=False)
+ return self.build_response(request, resp)
+
+session = requests.session()
+
+session.mount('file://', FileAdapter())
+
+try:
+ os.mkdir("doc-cache")
+except:
+ pass
+
+documents = [
+
+ {
+ "id": "https://trustgraph.ai/doc/challenger-report-vol-1",
+ "title": "Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1",
+ "comments": "The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined",
+# "url": "https://ntrs.nasa.gov/api/citations/19860015255/downloads/19860015255.pdf",
+ "url": "file://19860015255.pdf",
+ "kind": "application/pdf",
+ "date": datetime.datetime.now().date(),
+ "tags": ["nasa", "safety-engineering", "space-shuttle"],
+ "metadata": [
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/DigitalDocument")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/copyrightNotice"),
+ o = Literal("Work of the US Gov. Public Use Permitted")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/copyrightHolder"),
+ o = Literal("US Gov.")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/copyrightYear"),
+ o = Literal("1986")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("nasa")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("space-shuttle")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("safety-engineering")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("challenger")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("space-transportation")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/publication"),
+ o = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/PublicationEvent")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("The findings of the Commission regarding the circumstances surrounding the Challenger accident are reported and recommendations for corrective action are outlined")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
+ p = Uri("https://schema.org/publishedBy"),
+ o = Uri("https://trustgraph.ai/org/nasa")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/nasa"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/Organization")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/nasa"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("NASA")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/nasa"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("NASA")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
+ p = Uri("https://schema.org/startDate"),
+ o = Literal("1986-06-06")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/d946c320-0432-48c8-a015-26b0af3cedae"),
+ p = Uri("https://schema.org/endDate"),
+ o = Literal("1986-06-06")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/challenger-report-vol-1"),
+ p = Uri("https://schema.org/url"),
+ o = Uri("https://ntrs.nasa.gov/api/citations/19860015255/downloads/19860015255.pdf")
+ )
+ ]
+ },
+
+ {
+ "id": "https://trustgraph.ai/doc/icelandic-dictionary",
+ "title": "A Concise Dictionary of Old Icelandic",
+ "comments": "A Concise Dictionary of Old Icelandic, published in 1910, is a 551-page dictionary that offers a comprehensive overview of the Old Norse language, particularly Old Icelandic.",
+ "url": "https://css4.pub/2015/icelandic/dictionary.pdf",
+ "kind": "application/pdf",
+ "date": datetime.datetime.now().date(),
+ "tags": ["old-icelandic", "dictionary", "language", "grammar", "old-norse", "icelandic"],
+ "metadata": [
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/DigitalDocument")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("A Concise Dictionary of Old Icelandic"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("A Concise Dictionary of Old Icelandic"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("A Concise Dictionary of Old Icelandic, published in 1910, is a 551-page dictionary that offers a comprehensive overview of the Old Norse language, particularly Old Icelandic."),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/copyrightNotice"),
+ o = Literal("Copyright expired, public domain")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/copyrightHolder"),
+ o = Literal("Geir Zoëga, Clarendon Press")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/copyrightYear"),
+ o = Literal("1910")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("icelandic")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("old-norse")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("dictionary")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("grammar")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("old-icelandic")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/publication"),
+ o = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/PublicationEvent")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("Published by Clarendon Press in 1910"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
+ p = Uri("https://schema.org/publishedBy"),
+ o = Uri("https://trustgraph.ai/org/clarendon-press")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/clarendon-press"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/Organization")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/clarendon-press"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("NASA")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/clarendon-press"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("Clarendon Press")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
+ p = Uri("https://schema.org/startDate"),
+ o = Literal("1910-01-01")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/11a78156-3aea-4263-9f1b-0c63cbde69d7"),
+ p = Uri("https://schema.org/endDate"),
+ o = Literal("1910-01-01")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/icelandic-dictionary"),
+ p = Uri("https://schema.org/url"),
+ o = Uri("https://digital-research-books-beta.nypl.org/edition/10476341")
+ )
+ ]
+ },
+
+
+ {
+ "id": "https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025",
+ "title": "Annual threat assessment of the U.S. intelligence community - March 2025",
+ "comments": "The report reflects the collective insights of the Intelligence Community (IC), which is committed to providing the nuanced, independent, and unvarnished intelligence that policymakers, warfighters, and domestic law enforcement personnel need to protect American lives and America’s interests anywhere in the world.",
+ "url": "https://www.intelligence.senate.gov/sites/default/files/2025%20Annual%20Threat%20Assessment%20of%20the%20U.S.%20Intelligence%20Community.pdf",
+ "kind": "application/pdf",
+ "date": datetime.datetime.now().date(),
+ "tags": ["adversary-cooperation", "cyberthreats", "supply-chain-vulnerabilities", "economic-competition", "national-security", "data-privacy"],
+ "metadata": [
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/DigitalDocument")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("Annual threat assessment of the U.S. intelligence community - March 2025"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("Annual threat assessment of the U.S. intelligence community - March 2025"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("The report reflects the collective insights of the Intelligence Community (IC), which is committed to providing the nuanced, independent, and unvarnished intelligence that policymakers, warfighters, and domestic law enforcement personnel need to protect American lives and America’s interests anywhere in the world."),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/copyrightNotice"),
+ o = Literal("Not copyright")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/copyrightHolder"),
+ o = Literal("US Government")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/copyrightYear"),
+ o = Literal("2025")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("adversary-cooperation")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("cyberthreats")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("supply-chain-vulnerabilities")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("economic-competition")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("national-security")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/publication"),
+ o = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/PublicationEvent")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("Published by the Director of National Intelligence (DNI)"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
+ p = Uri("https://schema.org/publishedBy"),
+ o = Uri("https://trustgraph.ai/org/us-gov-dni")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/us-gov-dni"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/Organization")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/us-gov-dni"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("The Director of National Intelligence")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/us-gov-dni"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("The Director of National Intelligence")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
+ p = Uri("https://schema.org/startDate"),
+ o = Literal("2025-03-18")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/0f1cfbe2-ce64-403b-8327-799aa8ba3cec"),
+ p = Uri("https://schema.org/endDate"),
+ o = Literal("2025-03-18")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/annual-threat-assessment-us-dni-march-2025"),
+ p = Uri("https://schema.org/url"),
+ o = Uri("https://www.dni.gov/index.php/newsroom/reports-publications/reports-publications-2025/4058-2025-annual-threat-assessment")
+ )
+ ]
+ },
+
+ {
+ "id": "https://trustgraph.ai/doc/intelligence-and-state",
+ "title": "The Role of Intelligence and State Policies in International Security",
+ "comments": "A volume by Mehmet Emin Erendor, published by Cambridge Scholars Publishing (2021). It is well-known that the understanding of security has changed since the end of the Cold War. This, in turn, has impacted the characteristics of intelligence, as states have needed to improve their security policies with new intelligence tactics. This volume investigates this new state of play in the international arena.",
+ "url": "https://www.cambridgescholars.com/resources/pdfs/978-1-5275-7604-9-sample.pdf",
+ "kind": "application/pdf",
+ "date": "2025-05-06",
+ "tags": ["intelligence", "state-policy", "international-security", "national-security", "geopolitics", "foreign-policy", "security-studies", "military", "crime"],
+ "metadata": [
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/Book")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("The Role of Intelligence and State Policies in International Security")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("The Role of Intelligence and State Policies in International Security")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("A volume by Mehmet Emin Erendor. It is well-known that the understanding of security has changed since the end of the Cold War. This, in turn, has impacted the characteristics of intelligence, as states have needed to improve their security policies with new intelligence tactics. This volume investigates this new state of play in the international arena.")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/author"),
+ o = Literal("Mehmet Emin Erendor")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/isbn"),
+ o = Literal("9781527576049")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/numberOfPages"),
+ o = Literal("220")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("intelligence")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("state policy")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("international security")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("national security")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("geopolitics")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/publication"),
+ o = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/PublicationEvent")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("Published by Cambridge Scholars Publishing on October 28, 2021.")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
+ p = Uri("https://schema.org/publishedBy"),
+ o = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/Organization")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("Cambridge Scholars Publishing")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/cambridge-scholars-publishing"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("Cambridge Scholars Publishing")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/b4352222-5da0-480d-a00f-f7342fe77862"),
+ p = Uri("https://schema.org/startDate"),
+ o = Literal("2021-10-28")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/intelligence-and-state"),
+ p = Uri("https://schema.org/url"),
+ o = Uri("https://www.cambridgescholars.com/resources/pdfs/978-1-5275-7604-9-sample.pdf")
+ )
+ ]
+ },
+
+ {
+ "id": "https://trustgraph.ai/doc/beyond-vigilant-state",
+ "title": "Beyond the vigilant state: globalisation and intelligence",
+ "comments": "This academic paper by Richard J. Aldrich examines the relationship between globalization and intelligence agencies, discussing how intelligence services have adapted to global changes in the post-Cold War era.",
+ "url": "https://warwick.ac.uk/fac/soc/pais/people/aldrich/publications/beyond.pdf",
+ "kind": "application/pdf",
+ "date": datetime.datetime.now().date(),
+ "tags": ["intelligence", "globalization", "security-studies", "surveillance", "international-relations", "post-cold-war"],
+ "metadata": [
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/ScholarlyArticle")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("Beyond the vigilant state: globalisation and intelligence"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("Beyond the vigilant state: globalisation and intelligence"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("This academic paper by Richard J. Aldrich examines the relationship between globalization and intelligence agencies, discussing how intelligence services have adapted to global changes in the post-Cold War era."),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/copyrightNotice"),
+ o = Literal("(c) British International Studies Association")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/copyrightHolder"),
+ o = Literal("British International Studies Association")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/author"),
+ o = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/Person")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("Richard J. Aldrich")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/person/3a45f8c9-b7d1-42e5-8631-d9f82c4a0e22"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("Richard J. Aldrich")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("intelligence")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("globalisation")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("security-studies")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("surveillance")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("international-relations")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/keywords"),
+ o = Literal("post-cold-war")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/publication"),
+ o = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/PublicationEvent")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
+ p = Uri("https://schema.org/description"),
+ o = Literal("Published in Review of International Studies"),
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/pubev/75c83dfa-6b2e-4d89-bda1-c8e92f0e3410"),
+ p = Uri("https://schema.org/publishedBy"),
+ o = Uri("https://trustgraph.ai/org/british-international-studies-association")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
+ p = Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
+ o = Uri("https://schema.org/Organization")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
+ p = Uri("http://www.w3.org/2000/01/rdf-schema#label"),
+ o = Literal("British International Studies Association")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/org/british-international-studies-association"),
+ p = Uri("https://schema.org/name"),
+ o = Literal("British International Studies Association")
+ ),
+ Triple(
+ s = Uri("https://trustgraph.ai/doc/beyond-vigilant-state"),
+ p = Uri("https://schema.org/url"),
+ o = Uri("https://warwick.ac.uk/fac/soc/pais/people/aldrich/publications/beyond.pdf")
+ )
+ ]
+ }
+
+]
+
+class Loader:
+
+ def __init__(
+ self, url, user
+ ):
+
+ self.api = Api(url).library()
+ self.user = user
+
+ def load(self, documents):
+
+ for doc in documents:
+ self.load_doc(doc)
+
+ def load_doc(self, doc):
+
+ try:
+
+ print(doc["title"], ":")
+
+ hid = hash(doc["url"])
+ cache_file = f"doc-cache/{hid}"
+
+ if os.path.isfile(cache_file):
+ print(" (use cache file)")
+ content = open(cache_file, "rb").read()
+ else:
+ print(" downloading...")
+ resp = session.get(doc["url"])
+ content = resp.content
+ open(cache_file, "wb").write(content)
+ print(" done.")
+
+ print(" adding...")
+
+ self.api.add_document(
+ id = doc["id"], metadata = doc["metadata"],
+ user = self.user, kind = doc["kind"], title = doc["title"],
+ comments = doc["comments"], tags = doc["tags"],
+ document = content
+ )
+
+ print(" successful.")
+
+ except Exception as e:
+ print("Failed: {str(e)}", flush=True)
+ raise e
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-add-library-document',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ p = Loader(
+ url=args.url,
+ user=args.user,
+ )
+
+ p.load(documents)
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+ raise e
+
+main()
+
+
+
+
+# https://warwick.ac.uk/fac/soc/pais/people/aldrich/publications/beyond.pdf
+
+# https://www.ialeia.org/docs/Psychology_of_Intelligence_Analysis.pdf
+
diff --git a/trustgraph-cli/scripts/tg-load-text b/trustgraph-cli/scripts/tg-load-text
index 0cc221a5..d00548ad 100755
--- a/trustgraph-cli/scripts/tg-load-text
+++ b/trustgraph-cli/scripts/tg-load-text
@@ -1,7 +1,10 @@
#!/usr/bin/env python3
"""
-Loads a text document into TrustGraph processing.
+Loads a text document into TrustGraph processing by directing to a text
+loader queue.
+Consider using tg-add-library-document to load
+a document, followed by tg-start-library-processing to initiate processing.
"""
import pulsar
@@ -27,12 +30,13 @@ class Loader:
def __init__(
self,
url,
+ flow_id,
user,
collection,
metadata,
):
- self.api = Api(url)
+ self.api = Api(url).flow().id(flow_id)
self.user = user
self.collection = collection
@@ -59,14 +63,15 @@ class Loader:
self.api.load_text(
text=data, id=id, metadata=self.metadata,
-# user=self.user,
-# collection=self.collection,
+ user=self.user,
+ collection=self.collection,
)
print(f"{file}: Loaded successfully.")
except Exception as e:
print(f"{file}: Failed: {str(e)}", flush=True)
+ raise e
def main():
@@ -80,12 +85,12 @@ def main():
default=default_url,
help=f'API URL (default: {default_url})',
)
-
- # parser.add_argument(
- # '--pulsar-api-key',
- # default=default_pulsar_api_key,
- # help=f'Pulsar API key',
- # )
+
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
parser.add_argument(
'-U', '--user',
@@ -154,53 +159,50 @@ def main():
args = parser.parse_args()
- while True:
- try:
+ try:
- document = DigitalDocument(
- id,
- name=args.name,
- description=args.description,
- copyright_notice=args.copyright_notice,
- copyright_holder=args.copyright_holder,
- copyright_year=args.copyright_year,
- license=args.license,
- url=args.document_url,
- keywords=args.keyword,
+ document = DigitalDocument(
+ id,
+ name=args.name,
+ description=args.description,
+ copyright_notice=args.copyright_notice,
+ copyright_holder=args.copyright_holder,
+ copyright_year=args.copyright_year,
+ license=args.license,
+ url=args.document_url,
+ keywords=args.keyword,
+ )
+
+ if args.publication_organization:
+ org = Organization(
+ id=to_uri(PREF_ORG, hash(args.publication_organization)),
+ name=args.publication_organization,
+ )
+ document.publication = PublicationEvent(
+ id = to_uri(PREF_PUBEV, str(uuid.uuid4())),
+ organization=org,
+ description=args.publication_description,
+ start_date=args.publication_date,
+ end_date=args.publication_date,
)
- if args.publication_organization:
- org = Organization(
- id=to_uri(PREF_ORG, hash(args.publication_organization)),
- name=args.publication_organization,
- )
- document.publication = PublicationEvent(
- id = to_uri(PREF_PUBEV, str(uuid.uuid4())),
- organization=org,
- description=args.publication_description,
- start_date=args.publication_date,
- end_date=args.publication_date,
- )
+ p = Loader(
+ url = args.url,
+ flow_id = args.flow_id,
+ user = args.user,
+ collection = args.collection,
+ metadata = document,
+ )
- p = Loader(
- url=args.url,
- user=args.user,
- collection=args.collection,
- metadata=document,
- )
+ p.load(args.files)
- p.load(args.files)
+ print("All done.")
- print("All done.")
- break
+ except Exception as e:
- except Exception as e:
-
- print("Exception:", e, flush=True)
- print("Will retry...", flush=True)
-
- time.sleep(10)
+ print("Exception:", e, flush=True)
main()
+
diff --git a/trustgraph-cli/scripts/tg-load-turtle b/trustgraph-cli/scripts/tg-load-turtle
index 3417a87d..3cf24a7d 100755
--- a/trustgraph-cli/scripts/tg-load-turtle
+++ b/trustgraph-cli/scripts/tg-load-turtle
@@ -2,12 +2,13 @@
"""
Loads Graph embeddings into TrustGraph processing.
+
+FIXME: This hasn't been updated following API gateway change.
"""
import pulsar
from pulsar.schema import JsonSchema
from trustgraph.schema import Triples, Triple, Value, Metadata
-from trustgraph.schema import triples_store_queue
import argparse
import os
import time
@@ -109,6 +110,12 @@ def main():
default=default_pulsar_host,
help=f'Pulsar host (default: {default_pulsar_host})',
)
+
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
parser.add_argument(
'--pulsar-api-key',
@@ -174,5 +181,6 @@ def main():
time.sleep(10)
-main()
+print("Not implemented.")
+#main()
diff --git a/trustgraph-cli/scripts/tg-processor-state b/trustgraph-cli/scripts/tg-processor-state
deleted file mode 100755
index cfab00c8..00000000
--- a/trustgraph-cli/scripts/tg-processor-state
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Dump out TrustGraph processor states.
-"""
-
-import requests
-import argparse
-import tabulate
-
-default_prometheus_url = "http://localhost:9090"
-
-def dump_status(prom):
-
- url = f"{prom}/api/v1/query?query=processor_state%7Bprocessor_state%3D%22running%22%7D"
-
- resp = requests.get(url)
-
- obj = resp.json()
-
- tbl = [
- [
- m["metric"]["job"],
- "running" if int(m["value"][1]) > 0 else "down"
- ]
- for m in obj["data"]["result"]
- ]
-
- print(tabulate.tabulate(
- tbl, tablefmt="pretty", headers=["processor", "state"],
- stralign="left"
- ))
-
-
-def main():
-
- parser = argparse.ArgumentParser(
- prog='tg-processor-state',
- description=__doc__,
- )
-
- parser.add_argument(
- '-p', '--prometheus-url',
- default=default_prometheus_url,
- help=f'Prometheus URL (default: {default_prometheus_url})',
- )
-
- args = parser.parse_args()
-
- try:
-
- dump_status(args.prometheus_url)
-
- except Exception as e:
-
- print("Exception:", e, flush=True)
-
-main()
-
diff --git a/trustgraph-cli/scripts/tg-put-flow-class b/trustgraph-cli/scripts/tg-put-flow-class
new file mode 100755
index 00000000..74c29bf3
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-put-flow-class
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+"""
+Uploads a flow class definition. You can take the output of
+tg-get-flow-class and load it back in using this utility.
+"""
+
+import argparse
+import os
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def put_flow_class(url, class_name, config):
+
+ api = Api(url)
+
+ class_names = api.flow().put_class(class_name, config)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-put-flow-class',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-n', '--class-name',
+ help=f'Flow class name',
+ )
+
+ parser.add_argument(
+ '-c', '--config',
+ help=f'Initial configuration to load, should be raw JSON',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ put_flow_class(
+ url=args.api_url,
+ class_name=args.class_name,
+ config=json.loads(args.config),
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-put-kg-core b/trustgraph-cli/scripts/tg-put-kg-core
new file mode 100755
index 00000000..1184d6f7
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-put-kg-core
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+
+"""
+Uses the agent service to answer a question
+"""
+
+import argparse
+import os
+import textwrap
+import uuid
+import asyncio
+import json
+from websockets.asyncio.client import connect
+import msgpack
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'ws://localhost:8088/')
+default_user = 'trustgraph'
+
+def read_message(unpacked, id, user):
+
+ if unpacked[0] == "ge":
+ msg = unpacked[1]
+ return "ge", {
+ "metadata": {
+ "id": id,
+ "metadata": msg["m"]["m"],
+ "user": user,
+ "collection": "default", # Not used?
+ },
+ "entities": [
+ {
+ "entity": ent["e"],
+ "vectors": ent["v"],
+ }
+ for ent in msg["e"]
+ ],
+ }
+ elif unpacked[0] == "t":
+ msg = unpacked[1]
+ return "t", {
+ "metadata": {
+ "id": id,
+ "metadata": msg["m"]["m"],
+ "user": user,
+ "collection": "default", # Not used by receiver?
+ },
+ "triples": msg["t"],
+ }
+ else:
+ raise RuntimeError("Unpacked unexpected messsage type", unpacked[0])
+
+async def put(url, user, id, input):
+
+ if not url.endswith("/"):
+ url += "/"
+
+ url = url + "api/v1/socket"
+
+ async with connect(url) as ws:
+
+
+ ge = 0
+ t = 0
+
+ with open(input, "rb") as f:
+
+ unpacker = msgpack.Unpacker(f, raw=False)
+
+ while True:
+
+ try:
+ unpacked = unpacker.unpack()
+ except:
+ break
+
+ kind, msg = read_message(unpacked, id, user)
+
+ mid = str(uuid.uuid4())
+
+ if kind == "ge":
+
+ ge += 1
+
+ req = json.dumps({
+ "id": mid,
+ "service": "knowledge",
+ "request": {
+ "operation": "put-kg-core",
+ "user": user,
+ "id": id,
+ "graph-embeddings": msg
+ }
+ })
+
+ elif kind == "t":
+
+ t += 1
+
+ req = json.dumps({
+ "id": mid,
+ "service": "knowledge",
+ "request": {
+ "operation": "put-kg-core",
+ "user": user,
+ "id": id,
+ "triples": msg
+ }
+ })
+
+ else:
+
+ raise RuntimeError("Unexpected message kind", kind)
+
+ await ws.send(req)
+
+ # Retry loop, wait for right response to come back
+ while True:
+
+ msg = await ws.recv()
+ msg = json.loads(msg)
+
+ if msg["id"] != mid:
+ continue
+
+ if "response" in msg:
+ if "error" in msg["response"]:
+ raise RuntimeError(msg["response"]["error"])
+
+ break
+
+ print(f"Put: {t} triple, {ge} GE messages.")
+
+ await ws.close()
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-put-kg-core',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ parser.add_argument(
+ '--id', '--identifier',
+ required=True,
+ help=f'Knowledge core ID',
+ )
+
+ parser.add_argument(
+ '-i', '--input',
+ required=True,
+ help=f'Input file'
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ asyncio.run(
+ put(
+ url = args.url,
+ user = args.user,
+ id = args.id,
+ input = args.input,
+ )
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-remove-library-document b/trustgraph-cli/scripts/tg-remove-library-document
new file mode 100755
index 00000000..74f7ef27
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-remove-library-document
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+"""
+Remove a document from the library
+"""
+
+import argparse
+import os
+import uuid
+
+from trustgraph.api import Api
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_user = 'trustgraph'
+
+
+def remove_doc(url, user, id):
+
+ api = Api(url).library()
+
+ api.remove_document(user=user, id=id)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-remove-library-document',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ parser.add_argument(
+ '--identifier', '--id',
+ required=True,
+ help=f'Document ID'
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ remove_doc(args.url, args.user, args.identifier)
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-save-doc-embeds b/trustgraph-cli/scripts/tg-save-doc-embeds
index 95f8b748..07dc5f26 100755
--- a/trustgraph-cli/scripts/tg-save-doc-embeds
+++ b/trustgraph-cli/scripts/tg-save-doc-embeds
@@ -2,11 +2,11 @@
"""
This utility connects to a running TrustGraph through the API and creates
-a knowledge core from the data streaming through the processing queues.
-For completeness of data, tg-save-kg-core should be initiated before data
-loading takes place. The default output format, msgpack should be used.
-JSON output format is also available - msgpack produces a more compact
-representation, which is also more performant to load.
+a document embeddings core from the data streaming through the processing
+queues. For completeness of data, tg-save-doc-embeds should be initiated
+before data loading takes place. The default output format, msgpack
+should be used. JSON output format is also available - msgpack produces
+a more compact representation, which is also more performant to load.
"""
import aiohttp
@@ -27,9 +27,7 @@ async def fetch_de(running, queue, user, collection, url):
async with aiohttp.ClientSession() as session:
- de_url = f"{url}stream/document-embeddings"
-
- async with session.ws_connect(de_url) as ws:
+ async with session.ws_connect(url) as ws:
while running.get():
@@ -117,11 +115,14 @@ async def run(running, **args):
q = asyncio.Queue()
+ url = args["url"]
+ flow_id = args["flow_id"]
+
de_task = asyncio.create_task(
fetch_de(
running=running,
queue=q, user=args["user"], collection=args["collection"],
- url=args["url"] + "api/v1/"
+ url = f"{url}api/v1/flow/{flow_id}/export/document-embeddings"
)
)
@@ -158,6 +159,12 @@ async def main(running):
help=f'TrustGraph API URL (default: {default_url})',
)
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
parser.add_argument(
'-o', '--output-file',
# Make it mandatory, difficult to over-write an existing file
diff --git a/trustgraph-cli/scripts/tg-save-kg-core b/trustgraph-cli/scripts/tg-save-kg-core
deleted file mode 100755
index 298f2e84..00000000
--- a/trustgraph-cli/scripts/tg-save-kg-core
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-This utility connects to a running TrustGraph through the API and creates
-a knowledge core from the data streaming through the processing queues.
-For completeness of data, tg-save-kg-core should be initiated before data
-loading takes place. The default output format, msgpack should be used.
-JSON output format is also available - msgpack produces a more compact
-representation, which is also more performant to load.
-"""
-
-import aiohttp
-import asyncio
-import msgpack
-import json
-import sys
-import argparse
-import os
-import signal
-
-class Running:
- def __init__(self): self.running = True
- def get(self): return self.running
- def stop(self): self.running = False
-
-async def fetch_ge(running, queue, user, collection, url):
-
- async with aiohttp.ClientSession() as session:
-
- async with session.ws_connect(f"{url}stream/graph-embeddings") as ws:
-
- while running.get():
-
- try:
- msg = await asyncio.wait_for(ws.receive(), 1)
- except:
- continue
-
- if msg.type == aiohttp.WSMsgType.TEXT:
-
- data = msg.json()
-
- if user:
- if data["metadata"]["user"] != user:
- continue
-
- if collection:
- if data["metadata"]["collection"] != collection:
- continue
-
- await queue.put([
- "ge",
- {
- "m": {
- "i": data["metadata"]["id"],
- "m": data["metadata"]["metadata"],
- "u": data["metadata"]["user"],
- "c": data["metadata"]["collection"],
- },
- "e": [
- {
- "e": ent["entity"],
- "v": ent["vectors"],
- }
- for ent in data["entities"]
- ]
- }
- ])
- if msg.type == aiohttp.WSMsgType.ERROR:
- print("Error")
- break
-
-async def fetch_triples(running, queue, user, collection, url):
-
- async with aiohttp.ClientSession() as session:
-
- async with session.ws_connect(f"{url}stream/triples") as ws:
-
- while running.get():
-
- try:
- msg = await asyncio.wait_for(ws.receive(), 1)
- except:
- continue
-
- if msg.type == aiohttp.WSMsgType.TEXT:
-
- data = msg.json()
-
- if user:
- if data["metadata"]["user"] != user:
- continue
-
- if collection:
- if data["metadata"]["collection"] != collection:
- continue
-
- await queue.put((
- "t",
- {
- "m": {
- "i": data["metadata"]["id"],
- "m": data["metadata"]["metadata"],
- "u": data["metadata"]["user"],
- "c": data["metadata"]["collection"],
- },
- "t": data["triples"],
- }
- ))
- if msg.type == aiohttp.WSMsgType.ERROR:
- print("Error")
- break
-
-ge_counts = 0
-t_counts = 0
-
-async def stats(running):
-
- global t_counts
- global ge_counts
-
- while running.get():
-
- await asyncio.sleep(2)
-
- print(
- f"Graph embeddings: {ge_counts:10d} Triples: {t_counts:10d}"
- )
-
-async def output(running, queue, path, format):
-
- global t_counts
- global ge_counts
-
- with open(path, "wb") as f:
-
- while running.get():
-
- try:
- msg = await asyncio.wait_for(queue.get(), 0.5)
- except:
- # Hopefully it's TimeoutError. Annoying to match since
- # it changed in 3.11.
- continue
-
- if format == "msgpack":
- f.write(msgpack.packb(msg, use_bin_type=True))
- else:
- f.write(json.dumps(msg).encode("utf-8"))
-
- if msg[0] == "t":
- t_counts += 1
- else:
- if msg[0] == "ge":
- ge_counts += 1
-
- print("Output file closed")
-
-async def run(running, **args):
-
- q = asyncio.Queue()
-
- ge_task = asyncio.create_task(
- fetch_ge(
- running=running,
- queue=q, user=args["user"], collection=args["collection"],
- url=args["url"] + "api/v1/"
- )
- )
-
- triples_task = asyncio.create_task(
- fetch_triples(
- running=running, queue=q,
- user=args["user"], collection=args["collection"],
- url=args["url"] + "api/v1/"
- )
- )
-
- output_task = asyncio.create_task(
- output(
- running=running, queue=q,
- path=args["output_file"], format=args["format"],
- )
-
- )
-
- stats_task = asyncio.create_task(stats(running))
-
- await output_task
- await triples_task
- await ge_task
- await stats_task
-
- print("Exiting")
-
-async def main(running):
-
- parser = argparse.ArgumentParser(
- prog='tg-save-kg-core',
- description=__doc__,
- )
-
- default_url = os.getenv("TRUSTGRAPH_API", "http://localhost:8088/")
- default_user = "trustgraph"
- collection = "default"
-
- parser.add_argument(
- '-u', '--url',
- default=default_url,
- help=f'TrustGraph API URL (default: {default_url})',
- )
-
- parser.add_argument(
- '-o', '--output-file',
- # Make it mandatory, difficult to over-write an existing file
- required=True,
- help=f'Output file'
- )
-
- parser.add_argument(
- '--format',
- default="msgpack",
- choices=["msgpack", "json"],
- help=f'Output format (default: msgpack)',
- )
-
- parser.add_argument(
- '--user',
- help=f'User ID to filter on (default: no filter)'
- )
-
- parser.add_argument(
- '--collection',
- help=f'Collection ID to filter on (default: no filter)'
- )
-
- args = parser.parse_args()
-
- await run(running, **vars(args))
-
-running = Running()
-
-def interrupt(sig, frame):
- running.stop()
- print('Interrupt')
-
-signal.signal(signal.SIGINT, interrupt)
-
-asyncio.run(main(running))
-
diff --git a/trustgraph-cli/scripts/tg-set-prompt b/trustgraph-cli/scripts/tg-set-prompt
new file mode 100755
index 00000000..c19326e5
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-set-prompt
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+
+"""
+Sets a prompt template.
+"""
+
+import argparse
+import os
+from trustgraph.api import Api, ConfigKey, ConfigValue
+import json
+import tabulate
+import textwrap
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def set_system(url, system):
+
+ api = Api(url).config()
+
+ api.put([
+ ConfigValue(type="prompt", key="system", value=json.dumps(system))
+ ])
+
+ print("System prompt set.")
+
+def set_prompt(url, id, prompt, response, schema):
+
+ api = Api(url).config()
+
+ values = api.get([
+ ConfigKey(type="prompt", key="template-index")
+ ])
+
+ ix = json.loads(values[0].value)
+
+ object = {
+ "id": id,
+ "prompt": prompt,
+ }
+
+ if response:
+ object["response-type"] = response
+ else:
+ object["response-type"] = "text"
+
+ if schema:
+ object["schema"] = schema
+
+ if id not in ix:
+ ix.append(id)
+
+ values = api.put([
+ ConfigValue(
+ type="prompt", key="template-index", value=json.dumps(ix)
+ ),
+ ConfigValue(
+ type="prompt", key=f"template.{id}", value=json.dumps(object)
+ )
+ ])
+
+ print("Prompt set.")
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-set-prompt',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '--id',
+ help=f'Prompt ID',
+ )
+
+ parser.add_argument(
+ '--response',
+ help=f'Response form, should be one of: text json',
+ )
+
+ parser.add_argument(
+ '--schema',
+ help=f'JSON schema, for JSON response form',
+ )
+
+ parser.add_argument(
+ '--prompt',
+ help=f'Prompt template',
+ )
+
+ parser.add_argument(
+ '--system',
+ help=f'System prompt',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ if args.system:
+ if args.id or args.prompt or args.schema or args.response:
+ raise RuntimeError("Can't use --system with other args")
+
+ set_system(
+ url=args.api_url, system=args.system
+ )
+
+ else:
+
+ if args.id is None:
+ raise RuntimeError("Must specify --id for prompt")
+
+ if args.prompt is None:
+ raise RuntimeError("Must specify --prompt for prompt")
+
+ if args.response:
+ if args.response not in ["text", "json"]:
+ raise RuntimeError("Response must be one of: text json")
+
+ if args.schema:
+ try:
+ schobj = json.loads(args.schema)
+ except:
+ raise RuntimeError("JSON schema must be valid JSON")
+ else:
+ schobj = None
+
+ set_prompt(
+ url=args.api_url, id=args.id, prompt=args.prompt,
+ response=args.response, schema=schobj
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-set-token-costs b/trustgraph-cli/scripts/tg-set-token-costs
new file mode 100755
index 00000000..0c250fc2
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-set-token-costs
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+
+"""
+Sets a model's token costs.
+"""
+
+import argparse
+import os
+from trustgraph.api import Api, ConfigKey, ConfigValue
+import json
+import tabulate
+import textwrap
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def set_costs(api_url, model, input_costs, output_costs):
+
+ api = Api(api_url).config()
+
+ api.put([
+ ConfigValue(
+ type="token-costs", key=model,
+ value=json.dumps({
+ "input_price": input_costs / 1000000,
+ "output_price": output_costs / 1000000,
+ })
+ ),
+ ])
+
+def set_prompt(url, id, prompt, response, schema):
+
+ api = Api(url)
+
+ values = api.config_get([
+ ConfigKey(type="prompt", key="template-index")
+ ])
+
+ ix = json.loads(values[0].value)
+
+ object = {
+ "id": id,
+ "prompt": prompt,
+ }
+
+ if response:
+ object["response-type"] = response
+ else:
+ object["response-type"] = "text"
+
+ if schema:
+ object["schema"] = schema
+
+ if id not in ix:
+ ix.append(id)
+
+ values = api.config_put([
+ ConfigValue(
+ type="prompt", key="template-index", value=json.dumps(ix)
+ ),
+ ConfigValue(
+ type="prompt", key=f"template.{id}", value=json.dumps(object)
+ )
+ ])
+
+ print("Prompt set.")
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-set-token-costs',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '--model',
+ required=True,
+ help=f'Model ID',
+ )
+
+ parser.add_argument(
+ '-i', '--input-costs',
+ required=True,
+ type=float,
+ help=f'Input costs in $ per 1M tokens',
+ )
+
+ parser.add_argument(
+ '-o', '--output-costs',
+ required=True,
+ type=float,
+ help=f'Input costs in $ per 1M tokens',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ set_costs(**vars(args))
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-config b/trustgraph-cli/scripts/tg-show-config
new file mode 100755
index 00000000..efbd34a0
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-config
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+
+"""
+Dumps out the current configuration
+"""
+
+import argparse
+import os
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def show_config(url):
+
+ api = Api(url).config()
+
+ config, version = api.all()
+
+ print("Version:", version)
+ print(json.dumps(config, indent=4))
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-config',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_config(
+ url=args.api_url,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-flow-classes b/trustgraph-cli/scripts/tg-show-flow-classes
new file mode 100755
index 00000000..f0d2c510
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-flow-classes
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+"""
+Shows all defined flow classes.
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def show_flow_classes(url):
+
+ api = Api(url).flow()
+
+ class_names = api.list_classes()
+
+ if len(class_names) == 0:
+ print("No flows.")
+ return
+
+ classes = []
+
+ for class_name in class_names:
+ cls = api.get_class(class_name)
+ classes.append((
+ class_name,
+ cls.get("description", ""),
+ ", ".join(cls.get("tags", [])),
+ ))
+
+ print(tabulate.tabulate(
+ classes,
+ tablefmt="pretty",
+ maxcolwidths=[None, 40, 20],
+ stralign="left",
+ headers = ["flow class", "description", "tags"],
+ ))
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-flow-classes',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_flow_classes(
+ url=args.api_url,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-flow-state b/trustgraph-cli/scripts/tg-show-flow-state
new file mode 100755
index 00000000..7b8b1a42
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-flow-state
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+
+"""
+Dump out a flow's processor states
+"""
+
+import requests
+import argparse
+from trustgraph.api import Api
+import os
+
+default_metrics_url = "http://localhost:8088/api/metrics"
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def dump_status(metrics_url, api_url, flow_id):
+
+ api = Api(api_url).flow()
+
+ flow = api.get(flow_id)
+ class_name = flow["class-name"]
+
+ print()
+ print(f"Flow {flow_id}")
+ show_processors(metrics_url, flow_id)
+
+ print()
+ print(f"Class {class_name}")
+ show_processors(metrics_url, class_name)
+
+ print()
+
+def show_processors(metrics_url, flow_label):
+
+ url = f"{metrics_url}/query"
+
+ expr = f"consumer_state=\"running\",flow=\"{flow_label}\""
+
+ params = {
+ "query": "consumer_state{" + expr + "}"
+ }
+
+ resp = requests.get(url, params=params)
+
+ obj = resp.json()
+
+ tbl = [
+ [
+ m["metric"]["job"],
+ "\U0001f49a" if int(m["value"][1]) > 0 else "\U0000274c"
+ ]
+ for m in obj["data"]["result"]
+ ]
+
+ for row in tbl:
+ print(f"- {row[0]:30} {row[1]}")
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-flow-state',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-m', '--metrics-url',
+ default=default_metrics_url,
+ help=f'Metrics URL (default: {default_metrics_url})',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ dump_status(args.metrics_url, args.api_url, args.flow_id)
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-flows b/trustgraph-cli/scripts/tg-show-flows
new file mode 100755
index 00000000..edc55516
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-flows
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+
+"""
+Shows configured flows.
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api, ConfigKey
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def get_interface(config_api, i):
+
+ key = ConfigKey("interface-descriptions", i)
+
+ value = config_api.get([key])[0].value
+
+ return json.loads(value)
+
+def describe_interfaces(intdefs, flow):
+
+ intfs = flow.get("interfaces", {})
+
+ lst = []
+
+ for k, v in intdefs.items():
+
+ if intdefs[k].get("visible", False):
+
+ label = intdefs[k].get("description", k)
+ kind = intdefs[k].get("kind", None)
+
+ if kind == "request-response":
+ req = intfs[k]["request"]
+ resp = intfs[k]["request"]
+
+ lst.append(f"{k} request: {req}")
+ lst.append(f"{k} response: {resp}")
+
+ if kind == "send":
+ q = intfs[k]
+
+ lst.append(f"{k}: {q}")
+
+ return "\n".join(lst)
+
+def show_flows(url):
+
+ api = Api(url)
+ config_api = api.config()
+ flow_api = api.flow()
+
+ interface_names = config_api.list("interface-descriptions")
+
+ interface_defs = {
+ i: get_interface(config_api, i)
+ for i in interface_names
+ }
+
+ flow_ids = flow_api.list()
+
+ if len(flow_ids) == 0:
+ print("No flows.")
+ return
+
+ flows = []
+
+ for id in flow_ids:
+
+ flow = flow_api.get(id)
+
+ table = []
+ table.append(("id", id))
+ table.append(("class", flow.get("class-name", "")))
+ table.append(("desc", flow.get("description", "")))
+ table.append(("queue", describe_interfaces(interface_defs, flow)))
+
+ print(tabulate.tabulate(
+ table,
+ tablefmt="pretty",
+ stralign="left",
+ ))
+ print()
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-flows',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_flows(
+ url=args.api_url,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-graph-show b/trustgraph-cli/scripts/tg-show-graph
similarity index 72%
rename from trustgraph-cli/scripts/tg-graph-show
rename to trustgraph-cli/scripts/tg-show-graph
index a3d10283..3690a8b8 100755
--- a/trustgraph-cli/scripts/tg-graph-show
+++ b/trustgraph-cli/scripts/tg-show-graph
@@ -12,12 +12,12 @@ default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_user = 'trustgraph'
default_collection = 'default'
-def show_graph(url, user, collection):
+def show_graph(url, flow_id, user, collection):
- api = Api(url)
+ api = Api(url).flow().id(flow_id)
rows = api.triples_query(
-# user=user, collection=collection,
+ user=user, collection=collection,
s=None, p=None, o=None, limit=10_000,
)
@@ -27,7 +27,7 @@ def show_graph(url, user, collection):
def main():
parser = argparse.ArgumentParser(
- prog='tg-graph-show',
+ prog='tg-show-graph',
description=__doc__,
)
@@ -37,6 +37,12 @@ def main():
help=f'API URL (default: {default_url})',
)
+ parser.add_argument(
+ '-f', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)'
+ )
+
parser.add_argument(
'-U', '--user',
default=default_user,
@@ -54,9 +60,10 @@ def main():
try:
show_graph(
- url=args.api_url,
- user=args.user,
- collection=args.collection,
+ url = args.api_url,
+ flow_id = args.flow_id,
+ user = args.user,
+ collection = args.collection,
)
except Exception as e:
diff --git a/trustgraph-cli/scripts/tg-show-kg-cores b/trustgraph-cli/scripts/tg-show-kg-cores
new file mode 100755
index 00000000..cd908485
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-kg-cores
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+"""
+Shows knowledge cores
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api, ConfigKey
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def show_cores(url, user):
+
+ api = Api(url).knowledge()
+
+ ids = api.list_kg_cores()
+
+ if len(ids) == 0:
+ print("No knowledge cores.")
+
+ for id in ids:
+ print(id)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-flows',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default="trustgraph",
+ help='API URL (default: trustgraph)',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_cores(
+ url=args.api_url, user=args.user
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-library-documents b/trustgraph-cli/scripts/tg-show-library-documents
new file mode 100755
index 00000000..47062efc
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-library-documents
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+
+"""
+Shows all loaded library documents
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api, ConfigKey
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_user = "trustgraph"
+
+def show_docs(url, user):
+
+ api = Api(url).library()
+
+ docs = api.get_documents(user=user)
+
+ if len(docs) == 0:
+ print("No documents.")
+ return
+
+ for doc in docs:
+
+ table = []
+ table.append(("id", doc.id))
+ table.append(("time", doc.time))
+ table.append(("title", doc.title))
+ table.append(("kind", doc.kind))
+ table.append(("note", doc.comments))
+ table.append(("tags", ", ".join(doc.tags)))
+
+ print(tabulate.tabulate(
+ table,
+ tablefmt="pretty",
+ stralign="left",
+ maxcolwidths=[None, 67],
+ ))
+ print()
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-library-documents',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_docs(
+ url = args.api_url, user = args.user
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-library-processing b/trustgraph-cli/scripts/tg-show-library-processing
new file mode 100755
index 00000000..9390afe2
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-library-processing
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+"""
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api, ConfigKey
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_user = "trustgraph"
+
+def show_procs(url, user):
+
+ api = Api(url).library()
+
+ procs = api.get_processings(user = user)
+
+ if len(procs) == 0:
+ print("No processing objects.")
+ return
+
+ for proc in procs:
+
+ table = []
+ table.append(("id", proc.id))
+ table.append(("document-id", proc.document_id))
+ table.append(("time", proc.time))
+ table.append(("flow", proc.flow))
+ table.append(("collection", proc.collection))
+ table.append(("tags", ", ".join(proc.tags)))
+
+ print(tabulate.tabulate(
+ table,
+ tablefmt="pretty",
+ stralign="left",
+ maxcolwidths=[None, 50],
+ ))
+ print()
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-library-processing',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_procs(
+ url = args.api_url, user = args.user
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-processor-state b/trustgraph-cli/scripts/tg-show-processor-state
new file mode 100755
index 00000000..e66b1cc2
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-processor-state
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+
+"""
+Dump out TrustGraph processor states.
+"""
+
+import requests
+import argparse
+
+default_metrics_url = "http://localhost:8088/api/metrics"
+
+def dump_status(url):
+
+ url = f"{url}/query?query=processor_info"
+
+ resp = requests.get(url)
+
+ obj = resp.json()
+
+ tbl = [
+ [
+ m["metric"]["job"],
+ "\U0001f49a"
+ ]
+ for m in obj["data"]["result"]
+ ]
+
+ for row in tbl:
+ print(f" {row[0]:30} {row[1]}")
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-processor-state',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-m', '--metrics-url',
+ default=default_metrics_url,
+ help=f'Metrics URL (default: {default_metrics_url})',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ dump_status(args.metrics_url)
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-prompts b/trustgraph-cli/scripts/tg-show-prompts
new file mode 100755
index 00000000..98a8445e
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-prompts
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+
+"""
+Dumps out the current prompts
+"""
+
+import argparse
+import os
+from trustgraph.api import Api, ConfigKey
+import json
+import tabulate
+import textwrap
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def show_config(url):
+
+ api = Api(url).config()
+
+ values = api.get([
+ ConfigKey(type="prompt", key="system"),
+ ConfigKey(type="prompt", key="template-index")
+ ])
+
+ system = json.loads(values[0].value)
+ ix = json.loads(values[1].value)
+
+ values = api.get([
+ ConfigKey(type="prompt", key=f"template.{v}")
+ for v in ix
+ ])
+
+ print()
+
+ print("System prompt:")
+
+ print(tabulate.tabulate(
+ [["prompt", system]],
+ tablefmt="pretty",
+ maxcolwidths=[None, 70],
+ stralign="left"
+ ))
+
+ for n, key in enumerate(ix):
+
+ data = json.loads(values[n].value)
+
+ table = []
+
+ table.append(("prompt", data["prompt"]))
+
+ if "response-type" in data:
+ table.append(("response", data["response-type"]))
+
+ if "schema" in data:
+ table.append(("schema", data["schema"]))
+
+ print()
+ print(key + ":")
+
+ print(tabulate.tabulate(
+ table,
+ tablefmt="pretty",
+ maxcolwidths=[None, 70],
+ stralign="left"
+ ))
+
+ print()
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-prompts',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_config(
+ url=args.api_url,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-token-costs b/trustgraph-cli/scripts/tg-show-token-costs
new file mode 100755
index 00000000..1ebad213
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-token-costs
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+"""
+Dumps out token cost configuration
+"""
+
+import argparse
+import os
+from trustgraph.api import Api, ConfigKey
+import json
+import tabulate
+import textwrap
+
+tabulate.PRESERVE_WHITESPACE = True
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def show_config(url):
+
+ api = Api(url).config()
+
+ models = api.list("token-costs")
+
+ costs = []
+
+ def fmt(x):
+ return "{price:.3f}".format(price = 1000000 * x)
+
+ for model in models:
+
+ try:
+ values = json.loads(api.get([
+ ConfigKey(type="token-costs", key=model),
+ ])[0].value)
+ costs.append((
+ model,
+ fmt(values.get("input_price")),
+ fmt(values.get("output_price")),
+ ))
+ except:
+ costs.append((
+ model, "-", "-"
+ ))
+
+ print(tabulate.tabulate(
+ costs,
+ tablefmt = "pretty",
+ headers = ["model", "input, $/Mt", "output, $/Mt"],
+ colalign = ["left", "right", "right"],
+# stralign = ["left", "decimal", "decimal"]
+ ))
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-token-costs',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_config(
+ url=args.api_url,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-show-tools b/trustgraph-cli/scripts/tg-show-tools
new file mode 100755
index 00000000..b6c4a8e4
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-show-tools
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+"""
+Dumps out the current agent tool configuration
+"""
+
+import argparse
+import os
+from trustgraph.api import Api, ConfigKey
+import json
+import tabulate
+import textwrap
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def show_config(url):
+
+ api = Api(url).config()
+
+ values = api.get([
+ ConfigKey(type="agent", key="tool-index")
+ ])
+
+ ix = json.loads(values[0].value)
+
+ values = api.get([
+ ConfigKey(type="agent", key=f"tool.{v}")
+ for v in ix
+ ])
+
+ for n, key in enumerate(ix):
+
+ data = json.loads(values[n].value)
+
+ table = []
+
+ table.append(("id", data["id"]))
+ table.append(("name", data["name"]))
+ table.append(("description", data["description"]))
+
+ for n, arg in enumerate(data["arguments"]):
+ table.append((
+ f"arg {n}",
+ f"{arg['name']}: {arg['type']}\n{arg['description']}"
+ ))
+
+
+ print()
+ print(key + ":")
+
+ print(tabulate.tabulate(
+ table,
+ tablefmt="pretty",
+ maxcolwidths=[None, 70],
+ stralign="left"
+ ))
+
+ print()
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-show-tools',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ show_config(
+ url=args.api_url,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-start-flow b/trustgraph-cli/scripts/tg-start-flow
new file mode 100755
index 00000000..beb5de7e
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-start-flow
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+"""
+Starts a processing flow using a defined flow class
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def start_flow(url, class_name, flow_id, description):
+
+ api = Api(url).flow()
+
+ api.start(
+ class_name = class_name,
+ id = flow_id,
+ description = description,
+ )
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-start-flow',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-n', '--class-name',
+ required=True,
+ help=f'Flow class name',
+ )
+
+ parser.add_argument(
+ '-i', '--flow-id',
+ required=True,
+ help=f'Flow ID',
+ )
+
+ parser.add_argument(
+ '-d', '--description',
+ required=True,
+ help=f'Flow description',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ start_flow(
+ url = args.api_url,
+ class_name = args.class_name,
+ flow_id = args.flow_id,
+ description = args.description,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-start-library-processing b/trustgraph-cli/scripts/tg-start-library-processing
new file mode 100755
index 00000000..b03ae08d
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-start-library-processing
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+
+"""
+Submits a library document for processing
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api, ConfigKey
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_user = "trustgraph"
+
+def start_processing(
+ url, user, document_id, id, flow, collection, tags
+):
+
+ api = Api(url).library()
+
+ if tags:
+ tags = tags.split(",")
+ else:
+ tags = []
+
+ api.start_processing(
+ id = id,
+ document_id = document_id,
+ flow = flow,
+ user = user,
+ collection = collection,
+ tags = tags
+ )
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-start-library-processing',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ parser.add_argument(
+ '-i', '--flow-id',
+ default="0000",
+ help=f'Flow ID (default: 0000)',
+ )
+
+ parser.add_argument(
+ '-d', '--document-id',
+ required=True,
+ help=f'Document ID',
+ )
+
+ parser.add_argument(
+ '--id', '--processing-id',
+ required=True,
+ help=f'Processing ID',
+ )
+
+ parser.add_argument(
+ '--collection',
+ default='default',
+ help=f'Collection (default: default)'
+ )
+
+ parser.add_argument(
+ '--tags',
+ help=f'Tags, command separated'
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ start_processing(
+ url = args.api_url,
+ user = args.user,
+ document_id = args.document_id,
+ id = args.id,
+ flow = args.flow_id,
+ collection = args.collection,
+ tags = args.tags
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-stop-flow b/trustgraph-cli/scripts/tg-stop-flow
new file mode 100755
index 00000000..e92f611c
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-stop-flow
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+
+"""
+Stops a processing flow.
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+
+def stop_flow(url, flow_id):
+
+ api = Api(url).flow()
+
+ api.stop(id = flow_id)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-stop-flow',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-i', '--flow-id',
+ required=True,
+ help=f'Flow ID',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ stop_flow(
+ url=args.api_url,
+ flow_id=args.flow_id,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-stop-library-processing b/trustgraph-cli/scripts/tg-stop-library-processing
new file mode 100755
index 00000000..bb041b05
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-stop-library-processing
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+"""
+Removes a library document processing record. This is just a record of
+procesing, it doesn't stop in-flight processing at the moment.
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api, ConfigKey
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_user = "trustgraph"
+
+def stop_processing(
+ url, user, id
+):
+
+ api = Api(url).library()
+
+ api.stop_processing(user = user, id = id)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-stop-library-processing',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default=default_user,
+ help=f'User ID (default: {default_user})'
+ )
+
+ parser.add_argument(
+ '--id', '--processing-id',
+ required=True,
+ help=f'Processing ID',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ stop_processing(
+ url = args.api_url,
+ user = args.user,
+ id = args.id,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/scripts/tg-unload-kg-core b/trustgraph-cli/scripts/tg-unload-kg-core
new file mode 100755
index 00000000..7227942d
--- /dev/null
+++ b/trustgraph-cli/scripts/tg-unload-kg-core
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+"""
+Starts a load operation on a knowledge core which is already stored by
+the knowledge manager. You could load a core with tg-put-kg-core and then
+run this utility.
+"""
+
+import argparse
+import os
+import tabulate
+from trustgraph.api import Api
+import json
+
+default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
+default_flow = "0000"
+default_collection = "default"
+
+def unload_kg_core(url, user, id, flow):
+
+ api = Api(url).knowledge()
+
+ class_names = api.unload_kg_core(user = user, id = id, flow=flow)
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ prog='tg-delete-flow-class',
+ description=__doc__,
+ )
+
+ parser.add_argument(
+ '-u', '--api-url',
+ default=default_url,
+ help=f'API URL (default: {default_url})',
+ )
+
+ parser.add_argument(
+ '-U', '--user',
+ default="trustgraph",
+ help='API URL (default: trustgraph)',
+ )
+
+ parser.add_argument(
+ '--id', '--identifier',
+ required=True,
+ help=f'Knowledge core ID',
+ )
+
+ parser.add_argument(
+ '-f', '--flow-id',
+ default=default_flow,
+ help=f'Flow ID (default: {default_flow}',
+ )
+
+ args = parser.parse_args()
+
+ try:
+
+ unload_kg_core(
+ url=args.api_url,
+ user=args.user,
+ id=args.id,
+ flow=args.flow_id,
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+main()
+
diff --git a/trustgraph-cli/setup.py b/trustgraph-cli/setup.py
index 822ab765..b555eb92 100644
--- a/trustgraph-cli/setup.py
+++ b/trustgraph-cli/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
python_requires='>=3.8',
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
install_requires=[
- "trustgraph-base>=0.21,<0.22",
+ "trustgraph-base>=0.23,<0.24",
"requests",
"pulsar-client",
"aiohttp",
@@ -44,23 +44,47 @@ setuptools.setup(
"websockets",
],
scripts=[
+ "scripts/tg-add-library-document",
+ "scripts/tg-delete-flow-class",
+ "scripts/tg-delete-kg-core",
"scripts/tg-dump-msgpack",
- "scripts/tg-graph-show",
+ "scripts/tg-get-flow-class",
+ "scripts/tg-get-kg-core",
"scripts/tg-graph-to-turtle",
- "scripts/tg-init-pulsar",
- "scripts/tg-init-pulsar-manager",
+ "scripts/tg-init-trustgraph",
"scripts/tg-invoke-agent",
"scripts/tg-invoke-document-rag",
"scripts/tg-invoke-graph-rag",
"scripts/tg-invoke-llm",
"scripts/tg-invoke-prompt",
- "scripts/tg-load-kg-core",
"scripts/tg-load-doc-embeds",
+ "scripts/tg-load-kg-core",
"scripts/tg-load-pdf",
+ "scripts/tg-load-sample-documents",
"scripts/tg-load-text",
"scripts/tg-load-turtle",
- "scripts/tg-processor-state",
- "scripts/tg-save-kg-core",
+ "scripts/tg-put-flow-class",
+ "scripts/tg-put-kg-core",
+ "scripts/tg-remove-library-document",
"scripts/tg-save-doc-embeds",
+ "scripts/tg-set-prompt",
+ "scripts/tg-set-token-costs",
+ "scripts/tg-show-config",
+ "scripts/tg-show-flow-classes",
+ "scripts/tg-show-flow-state",
+ "scripts/tg-show-flows",
+ "scripts/tg-show-graph",
+ "scripts/tg-show-kg-cores",
+ "scripts/tg-show-library-documents",
+ "scripts/tg-show-library-processing",
+ "scripts/tg-show-processor-state",
+ "scripts/tg-show-prompts",
+ "scripts/tg-show-token-costs",
+ "scripts/tg-show-tools",
+ "scripts/tg-start-flow",
+ "scripts/tg-unload-kg-core",
+ "scripts/tg-start-library-processing",
+ "scripts/tg-stop-flow",
+ "scripts/tg-stop-library-processing",
]
)
diff --git a/trustgraph-embeddings-hf/setup.py b/trustgraph-embeddings-hf/setup.py
index 8cf5beb4..a01a9f11 100644
--- a/trustgraph-embeddings-hf/setup.py
+++ b/trustgraph-embeddings-hf/setup.py
@@ -34,8 +34,8 @@ setuptools.setup(
python_requires='>=3.8',
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
install_requires=[
- "trustgraph-base>=0.21,<0.22",
- "trustgraph-flow>=0.21,<0.22",
+ "trustgraph-base>=0.23,<0.24",
+ "trustgraph-flow>=0.23,<0.24",
"torch",
"urllib3",
"transformers",
diff --git a/trustgraph-embeddings-hf/trustgraph/embeddings/hf/hf.py b/trustgraph-embeddings-hf/trustgraph/embeddings/hf/hf.py
index 2e44821e..0ab3cef9 100755
--- a/trustgraph-embeddings-hf/trustgraph/embeddings/hf/hf.py
+++ b/trustgraph-embeddings-hf/trustgraph/embeddings/hf/hf.py
@@ -4,89 +4,37 @@ Embeddings service, applies an embeddings model selected from HuggingFace.
Input is text, output is embeddings vector.
"""
+from ... base import EmbeddingsService
+
from langchain_huggingface import HuggingFaceEmbeddings
-from trustgraph.schema import EmbeddingsRequest, EmbeddingsResponse, Error
-from trustgraph.schema import embeddings_request_queue
-from trustgraph.schema import embeddings_response_queue
-from trustgraph.log_level import LogLevel
-from trustgraph.base import ConsumerProducer
+default_ident = "embeddings"
-module = ".".join(__name__.split(".")[1:-1])
-
-default_input_queue = embeddings_request_queue
-default_output_queue = embeddings_response_queue
-default_subscriber = module
default_model="all-MiniLM-L6-v2"
-class Processor(ConsumerProducer):
+class Processor(EmbeddingsService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
super(Processor, self).__init__(
- **params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": EmbeddingsRequest,
- "output_schema": EmbeddingsResponse,
- }
+ **params | { "model": model }
)
+ print("Get model...", flush=True)
self.embeddings = HuggingFaceEmbeddings(model_name=model)
- async def handle(self, msg):
+ async def on_embeddings(self, text):
- v = msg.value()
-
- # Sender-produced ID
- id = msg.properties()["id"]
-
- print(f"Handling input {id}...", flush=True)
-
- try:
-
- text = v.text
- embeds = self.embeddings.embed_documents([text])
-
- print("Send response...", flush=True)
- r = EmbeddingsResponse(vectors=embeds, error=None)
- await self.send(r, properties={"id": id})
-
- print("Done.", flush=True)
-
-
- except Exception as e:
-
- print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = EmbeddingsResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
-
+ embeds = self.embeddings.embed_documents([text])
+ print("Done.", flush=True)
+ return embeds
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ EmbeddingsService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -96,5 +44,5 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/scripts/config-svc b/trustgraph-flow/scripts/config-svc
new file mode 100755
index 00000000..9debd391
--- /dev/null
+++ b/trustgraph-flow/scripts/config-svc
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.config.service import run
+
+run()
+
diff --git a/trustgraph-flow/scripts/kg-manager b/trustgraph-flow/scripts/kg-manager
new file mode 100644
index 00000000..ee8ec923
--- /dev/null
+++ b/trustgraph-flow/scripts/kg-manager
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.cores import run
+
+run()
+
diff --git a/trustgraph-flow/scripts/kg-store b/trustgraph-flow/scripts/kg-store
new file mode 100644
index 00000000..1a5ba9ef
--- /dev/null
+++ b/trustgraph-flow/scripts/kg-store
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from trustgraph.storage.knowledge import run
+
+run()
+
diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py
index 4b6179b6..b4598b64 100644
--- a/trustgraph-flow/setup.py
+++ b/trustgraph-flow/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
python_requires='>=3.8',
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
install_requires=[
- "trustgraph-base>=0.21,<0.22",
+ "trustgraph-base>=0.23,<0.24",
"aiohttp",
"anthropic",
"cassandra-driver",
@@ -73,6 +73,7 @@ setuptools.setup(
"scripts/api-gateway",
"scripts/chunker-recursive",
"scripts/chunker-token",
+ "scripts/config-svc",
"scripts/de-query-milvus",
"scripts/de-query-pinecone",
"scripts/de-query-qdrant",
@@ -94,6 +95,8 @@ setuptools.setup(
"scripts/kg-extract-definitions",
"scripts/kg-extract-relationships",
"scripts/kg-extract-topics",
+ "scripts/kg-store",
+ "scripts/kg-manager",
"scripts/librarian",
"scripts/metering",
"scripts/object-extract-row",
diff --git a/trustgraph-flow/trustgraph/agent/react/agent_manager.py b/trustgraph-flow/trustgraph/agent/react/agent_manager.py
index 5d071e30..d20b86f7 100644
--- a/trustgraph-flow/trustgraph/agent/react/agent_manager.py
+++ b/trustgraph-flow/trustgraph/agent/react/agent_manager.py
@@ -8,12 +8,11 @@ logger = logging.getLogger(__name__)
class AgentManager:
- def __init__(self, context, tools, additional_context=None):
- self.context = context
+ def __init__(self, tools, additional_context=None):
self.tools = tools
self.additional_context = additional_context
- def reason(self, question, history):
+ async def reason(self, question, history, context):
tools = self.tools
@@ -56,10 +55,7 @@ class AgentManager:
logger.info(f"prompt: {variables}")
- obj = self.context.prompt.request(
- "agent-react",
- variables
- )
+ obj = await context("prompt-request").agent_react(variables)
print(json.dumps(obj, indent=4), flush=True)
@@ -85,32 +81,41 @@ class AgentManager:
return a
- def react(self, question, history, think, observe):
+ async def react(self, question, history, think, observe, context):
- act = self.reason(question, history)
+ act = await self.reason(
+ question = question,
+ history = history,
+ context = context,
+ )
logger.info(f"act: {act}")
if isinstance(act, Final):
- think(act.thought)
+ await think(act.thought)
return act
else:
- think(act.thought)
+ await think(act.thought)
if act.name in self.tools:
action = self.tools[act.name]
else:
raise RuntimeError(f"No action for {act.name}!")
- resp = action.implementation.invoke(**act.arguments)
+ print("TOOL>>>", act)
+ resp = await action.implementation(context).invoke(
+ **act.arguments
+ )
+
+ print("RSETUL", resp)
resp = resp.strip()
logger.info(f"resp: {resp}")
- observe(resp)
+ await observe(resp)
act.observation = resp
diff --git a/trustgraph-flow/trustgraph/agent/react/service.py b/trustgraph-flow/trustgraph/agent/react/service.py
index bc045b71..beb17fd4 100755
--- a/trustgraph-flow/trustgraph/agent/react/service.py
+++ b/trustgraph-flow/trustgraph/agent/react/service.py
@@ -6,201 +6,136 @@ import json
import re
import sys
-from pulsar.schema import JsonSchema
+from ... base import AgentService, TextCompletionClientSpec, PromptClientSpec
+from ... base import GraphRagClientSpec
-from ... base import ConsumerProducer
-from ... schema import Error
-from ... schema import AgentRequest, AgentResponse, AgentStep
-from ... schema import agent_request_queue, agent_response_queue
-from ... schema import prompt_request_queue as pr_request_queue
-from ... schema import prompt_response_queue as pr_response_queue
-from ... schema import graph_rag_request_queue as gr_request_queue
-from ... schema import graph_rag_response_queue as gr_response_queue
-from ... clients.prompt_client import PromptClient
-from ... clients.llm_client import LlmClient
-from ... clients.graph_rag_client import GraphRagClient
+from ... schema import AgentRequest, AgentResponse, AgentStep, Error
from . tools import KnowledgeQueryImpl, TextCompletionImpl
from . agent_manager import AgentManager
from . types import Final, Action, Tool, Argument
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "agent-manager"
+default_max_iterations = 10
-default_input_queue = agent_request_queue
-default_output_queue = agent_response_queue
-default_subscriber = module
-default_max_iterations = 15
-
-class Processor(ConsumerProducer):
+class Processor(AgentService):
def __init__(self, **params):
- additional = params.get("context", None)
+ id = params.get("id")
- self.max_iterations = int(params.get("max_iterations", default_max_iterations))
-
- tools = {}
-
- # Parsing the prompt information to the prompt configuration
- # structure
- tool_type_arg = params.get("tool_type", [])
- if tool_type_arg:
- for t in tool_type_arg:
- toks = t.split("=", 1)
- if len(toks) < 2:
- raise RuntimeError(
- f"Tool-type string not well-formed: {t}"
- )
- ttoks = toks[1].split(":", 1)
- if len(ttoks) < 1:
- raise RuntimeError(
- f"Tool-type string not well-formed: {t}"
- )
-
- if ttoks[0] == "knowledge-query":
- impl = KnowledgeQueryImpl(self)
- elif ttoks[0] == "text-completion":
- impl = TextCompletionImpl(self)
- else:
- raise RuntimeError(
- f"Tool-kind {ttoks[0]} not known"
- )
-
- if len(ttoks) == 1:
-
- tools[toks[0]] = Tool(
- name = toks[0],
- description = "",
- implementation = impl,
- config = { "input": "query" },
- arguments = {},
- )
- else:
- tools[toks[0]] = Tool(
- name = toks[0],
- description = "",
- implementation = impl,
- config = { "input": ttoks[1] },
- arguments = {},
- )
-
- # parsing the prompt information to the prompt configuration
- # structure
- tool_desc_arg = params.get("tool_description", [])
- if tool_desc_arg:
- for t in tool_desc_arg:
- toks = t.split("=", 1)
- if len(toks) < 2:
- raise runtimeerror(
- f"tool-type string not well-formed: {t}"
- )
- if toks[0] not in tools:
- raise runtimeerror(f"description, tool {toks[0]} not known")
- tools[toks[0]].description = toks[1]
-
- # Parsing the prompt information to the prompt configuration
- # structure
- tool_arg_arg = params.get("tool_argument", [])
- if tool_arg_arg:
- for t in tool_arg_arg:
- toks = t.split("=", 1)
- if len(toks) < 2:
- raise RuntimeError(
- f"Tool-type string not well-formed: {t}"
- )
- ttoks = toks[1].split(":", 2)
- if len(ttoks) != 3:
- raise RuntimeError(
- f"Tool argument string not well-formed: {t}"
- )
- if toks[0] not in tools:
- raise RuntimeError(f"Description, tool {toks[0]} not known")
- tools[toks[0]].arguments[ttoks[0]] = Argument(
- name = ttoks[0],
- type = ttoks[1],
- description = ttoks[2]
- )
-
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
- prompt_request_queue = params.get(
- "prompt_request_queue", pr_request_queue
- )
- prompt_response_queue = params.get(
- "prompt_response_queue", pr_response_queue
- )
- graph_rag_request_queue = params.get(
- "graph_rag_request_queue", gr_request_queue
- )
- graph_rag_response_queue = params.get(
- "graph_rag_response_queue", gr_response_queue
+ self.max_iterations = int(
+ params.get("max_iterations", default_max_iterations)
)
+ self.config_key = params.get("config_type", "agent")
+
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": AgentRequest,
- "output_schema": AgentResponse,
- "prompt_request_queue": prompt_request_queue,
- "prompt_response_queue": prompt_response_queue,
- "graph_rag_request_queue": gr_request_queue,
- "graph_rag_response_queue": gr_response_queue,
+ "id": id,
+ "max_iterations": self.max_iterations,
+ "config_type": self.config_key,
}
)
- self.prompt = PromptClient(
- subscriber=subscriber,
- input_queue=prompt_request_queue,
- output_queue=prompt_response_queue,
- pulsar_host = self.pulsar_host,
- pulsar_api_key=self.pulsar_api_key,
- )
-
- self.graph_rag = GraphRagClient(
- subscriber=subscriber,
- input_queue=graph_rag_request_queue,
- output_queue=graph_rag_response_queue,
- pulsar_host = self.pulsar_host,
- pulsar_api_key=self.pulsar_api_key,
- )
-
- # Need to be able to feed requests to myself
- self.recursive_input = self.client.create_producer(
- topic=input_queue,
- schema=JsonSchema(AgentRequest),
- )
-
self.agent = AgentManager(
- context=self,
- tools=tools,
- additional_context=additional
+ tools=[],
+ additional_context="",
)
- def parse_json(self, text):
- json_match = re.search(r'```(?:json)?(.*?)```', text, re.DOTALL)
-
- if json_match:
- json_str = json_match.group(1).strip()
- else:
- # If no delimiters, assume the entire output is JSON
- json_str = text.strip()
+ self.config_handlers.append(self.on_tools_config)
- return json.loads(json_str)
+ self.register_specification(
+ TextCompletionClientSpec(
+ request_name = "text-completion-request",
+ response_name = "text-completion-response",
+ )
+ )
- async def handle(self, msg):
+ self.register_specification(
+ GraphRagClientSpec(
+ request_name = "graph-rag-request",
+ response_name = "graph-rag-response",
+ )
+ )
+
+ self.register_specification(
+ PromptClientSpec(
+ request_name = "prompt-request",
+ response_name = "prompt-response",
+ )
+ )
+
+ async def on_tools_config(self, config, version):
+
+ print("Loading configuration version", version)
+
+ if self.config_key not in config:
+ print(f"No key {self.config_key} in config", flush=True)
+ return
+
+ config = config[self.config_key]
try:
- v = msg.value()
+ # This is some extra stuff to put in the prompt
+ additional = config.get("additional-context", None)
- # Sender-produced ID
- id = msg.properties()["id"]
+ ix = json.loads(config["tool-index"])
- if v.history:
+ tools = {}
+
+ for k in ix:
+
+ pc = config[f"tool.{k}"]
+ data = json.loads(pc)
+
+ arguments = {
+ v.get("name"): Argument(
+ name = v.get("name"),
+ type = v.get("type"),
+ description = v.get("description")
+ )
+ for v in data["arguments"]
+ }
+
+ impl_id = data.get("type")
+
+ if impl_id == "knowledge-query":
+ impl = KnowledgeQueryImpl
+ elif impl_id == "text-completion":
+ impl = TextCompletionImpl
+ else:
+ raise RuntimeError(
+ f"Tool-kind {impl_id} not known"
+ )
+
+ tools[data.get("name")] = Tool(
+ name = data.get("name"),
+ description = data.get("description"),
+ implementation = impl,
+ config=data.get("config", {}),
+ arguments = arguments,
+ )
+
+ self.agent = AgentManager(
+ tools=tools,
+ additional_context=additional
+ )
+
+ print("Prompt configuration reloaded.", flush=True)
+
+ except Exception as e:
+
+ print("on_tools_config Exception:", e, flush=True)
+ print("Configuration reload failed", flush=True)
+
+ async def agent_request(self, request, respond, next, flow):
+
+ try:
+
+ if request.history:
history = [
Action(
thought=h.thought,
@@ -208,12 +143,12 @@ class Processor(ConsumerProducer):
arguments=h.arguments,
observation=h.observation
)
- for h in v.history
+ for h in request.history
]
else:
history = []
- print(f"Question: {v.question}", flush=True)
+ print(f"Question: {request.question}", flush=True)
if len(history) >= self.max_iterations:
raise RuntimeError("Too many agent iterations")
@@ -231,7 +166,7 @@ class Processor(ConsumerProducer):
observation=None,
)
- await self.send(r, properties={"id": id})
+ await respond(r)
async def observe(x):
@@ -244,15 +179,21 @@ class Processor(ConsumerProducer):
observation=x,
)
- await self.send(r, properties={"id": id})
+ await respond(r)
- act = self.agent.react(v.question, history, think, observe)
+ act = await self.agent.react(
+ question = request.question,
+ history = history,
+ think = think,
+ observe = observe,
+ context = flow,
+ )
print(f"Action: {act}", flush=True)
- print("Send response...", flush=True)
+ if isinstance(act, Final):
- if type(act) == Final:
+ print("Send final response...", flush=True)
r = AgentResponse(
answer=act.final,
@@ -260,18 +201,20 @@ class Processor(ConsumerProducer):
thought=None,
)
- await self.send(r, properties={"id": id})
+ await respond(r)
print("Done.", flush=True)
return
+ print("Send next...", flush=True)
+
history.append(act)
r = AgentRequest(
- question=v.question,
- plan=v.plan,
- state=v.state,
+ question=request.question,
+ plan=request.plan,
+ state=request.state,
history=[
AgentStep(
thought=h.thought,
@@ -283,7 +226,7 @@ class Processor(ConsumerProducer):
]
)
- self.recursive_input.send(r, properties={"id": id})
+ await next(r)
print("Done.", flush=True)
@@ -291,7 +234,7 @@ class Processor(ConsumerProducer):
except Exception as e:
- print(f"Exception: {e}")
+ print(f"agent_request Exception: {e}")
print("Send error response...", flush=True)
@@ -303,73 +246,12 @@ class Processor(ConsumerProducer):
response=None,
)
- await self.send(r, properties={"id": id})
+ await respond(r)
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
-
- parser.add_argument(
- '--prompt-request-queue',
- default=pr_request_queue,
- help=f'Prompt request queue (default: {pr_request_queue})',
- )
-
- parser.add_argument(
- '--prompt-response-queue',
- default=pr_response_queue,
- help=f'Prompt response queue (default: {pr_response_queue})',
- )
-
- parser.add_argument(
- '--graph-rag-request-queue',
- default=gr_request_queue,
- help=f'Graph RAG request queue (default: {gr_request_queue})',
- )
-
- parser.add_argument(
- '--graph-rag-response-queue',
- default=gr_response_queue,
- help=f'Graph RAG response queue (default: {gr_response_queue})',
- )
-
- parser.add_argument(
- '--tool-type', nargs='*',
- help=f'''Specifies the type of an agent tool. Takes the form
-=. is the name of the tool. is one of
-knowledge-query, text-completion. Additional parameters are specified
-for different tools which are tool-specific. e.g. knowledge-query:
-which specifies the name of the arg whose content is fed into the knowledge
-query as a question. text-completion: specifies the name of the arg
-whose content is fed into the text-completion service as a prompt'''
- )
-
- parser.add_argument(
- '--tool-description', nargs='*',
- help=f'''Specifies the textual description of a tool. Takes
-the form =. The description is important, it teaches the
-LLM how to use the tool. It should describe what it does and how to
-use the arguments. This is specified in natural language.'''
- )
-
- parser.add_argument(
- '--tool-argument', nargs='*',
- help=f'''Specifies argument usage for a tool. Takes
-the form =::. The description is important,
-it is read by the LLM and used to determine how to use the argument.
- can be specified multiple times to give a tool multiple arguments.
- is one of string, number. is a natural language
-description.'''
- )
-
- parser.add_argument(
- '--context',
- help=f'Optional, specifies additional context text for the LLM.'
- )
+ AgentService.add_args(parser)
parser.add_argument(
'--max-iterations',
@@ -377,7 +259,13 @@ description.'''
help=f'Maximum number of react iterations (default: {default_max_iterations})',
)
+ parser.add_argument(
+ '--config-type',
+ default="agent",
+ help=f'Configuration key for prompts (default: agent)',
+ )
+
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/agent/react/tools.py b/trustgraph-flow/trustgraph/agent/react/tools.py
index 941610be..31568b25 100644
--- a/trustgraph-flow/trustgraph/agent/react/tools.py
+++ b/trustgraph-flow/trustgraph/agent/react/tools.py
@@ -4,16 +4,22 @@
class KnowledgeQueryImpl:
def __init__(self, context):
self.context = context
- def invoke(self, **arguments):
- return self.context.graph_rag.request(arguments.get("query"))
+ async def invoke(self, **arguments):
+ client = self.context("graph-rag-request")
+ print("Graph RAG question...", flush=True)
+ return await client.rag(
+ arguments.get("question")
+ )
# This tool implementation knows how to do text completion. This uses
# the prompt service, rather than talking to TextCompletion directly.
class TextCompletionImpl:
def __init__(self, context):
self.context = context
- def invoke(self, **arguments):
- return self.context.prompt.request(
- "question", { "question": arguments.get("question") }
+ async def invoke(self, **arguments):
+ client = self.context("prompt-request")
+ print("Prompt question...", flush=True)
+ return await client.question(
+ arguments.get("question")
)
diff --git a/trustgraph-flow/trustgraph/chunking/recursive/chunker.py b/trustgraph-flow/trustgraph/chunking/recursive/chunker.py
index 82f333b5..aa48cc57 100755
--- a/trustgraph-flow/trustgraph/chunking/recursive/chunker.py
+++ b/trustgraph-flow/trustgraph/chunking/recursive/chunker.py
@@ -7,40 +7,27 @@ as text as separate output objects.
from langchain_text_splitters import RecursiveCharacterTextSplitter
from prometheus_client import Histogram
-from ... schema import TextDocument, Chunk, Metadata
-from ... schema import text_ingest_queue, chunk_ingest_queue
-from ... log_level import LogLevel
-from ... base import ConsumerProducer
+from ... schema import TextDocument, Chunk
+from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "chunker"
-default_input_queue = text_ingest_queue
-default_output_queue = chunk_ingest_queue
-default_subscriber = module
-
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
+ id = params.get("id", default_ident)
chunk_size = params.get("chunk_size", 2000)
chunk_overlap = params.get("chunk_overlap", 100)
super(Processor, self).__init__(
- **params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextDocument,
- "output_schema": Chunk,
- }
+ **params | { "id": id }
)
if not hasattr(__class__, "chunk_metric"):
__class__.chunk_metric = Histogram(
'chunk_size', 'Chunk size',
+ ["id", "flow"],
buckets=[100, 160, 250, 400, 650, 1000, 1600,
2500, 4000, 6400, 10000, 16000]
)
@@ -52,7 +39,24 @@ class Processor(ConsumerProducer):
is_separator_regex=False,
)
- async def handle(self, msg):
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = TextDocument,
+ handler = self.on_message,
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "output",
+ schema = Chunk,
+ )
+ )
+
+ print("Chunker initialised", flush=True)
+
+ async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Chunking {v.metadata.id}...", flush=True)
@@ -63,24 +67,25 @@ class Processor(ConsumerProducer):
for ix, chunk in enumerate(texts):
+ print("Chunk", len(chunk.page_content), flush=True)
+
r = Chunk(
metadata=v.metadata,
chunk=chunk.page_content.encode("utf-8"),
)
- __class__.chunk_metric.observe(len(chunk.page_content))
+ __class__.chunk_metric.labels(
+ id=consumer.id, flow=consumer.flow
+ ).observe(len(chunk.page_content))
- await self.send(r)
+ await flow("output").send(r)
print("Done.", flush=True)
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ FlowProcessor.add_args(parser)
parser.add_argument(
'-z', '--chunk-size',
@@ -98,5 +103,5 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/chunking/token/chunker.py b/trustgraph-flow/trustgraph/chunking/token/chunker.py
index c625b48c..ff217350 100755
--- a/trustgraph-flow/trustgraph/chunking/token/chunker.py
+++ b/trustgraph-flow/trustgraph/chunking/token/chunker.py
@@ -7,40 +7,27 @@ as text as separate output objects.
from langchain_text_splitters import TokenTextSplitter
from prometheus_client import Histogram
-from ... schema import TextDocument, Chunk, Metadata
-from ... schema import text_ingest_queue, chunk_ingest_queue
-from ... log_level import LogLevel
-from ... base import ConsumerProducer
+from ... schema import TextDocument, Chunk
+from ... base import FlowProcessor
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "chunker"
-default_input_queue = text_ingest_queue
-default_output_queue = chunk_ingest_queue
-default_subscriber = module
-
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
+ id = params.get("id")
chunk_size = params.get("chunk_size", 250)
chunk_overlap = params.get("chunk_overlap", 15)
super(Processor, self).__init__(
- **params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextDocument,
- "output_schema": Chunk,
- }
+ **params | { "id": id }
)
if not hasattr(__class__, "chunk_metric"):
__class__.chunk_metric = Histogram(
'chunk_size', 'Chunk size',
+ ["id", "flow"],
buckets=[100, 160, 250, 400, 650, 1000, 1600,
2500, 4000, 6400, 10000, 16000]
)
@@ -51,7 +38,24 @@ class Processor(ConsumerProducer):
chunk_overlap=chunk_overlap,
)
- async def handle(self, msg):
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = TextDocument,
+ handler = self.on_message,
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "output",
+ schema = Chunk,
+ )
+ )
+
+ print("Chunker initialised", flush=True)
+
+ async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Chunking {v.metadata.id}...", flush=True)
@@ -62,24 +66,25 @@ class Processor(ConsumerProducer):
for ix, chunk in enumerate(texts):
+ print("Chunk", len(chunk.page_content), flush=True)
+
r = Chunk(
metadata=v.metadata,
chunk=chunk.page_content.encode("utf-8"),
)
- __class__.chunk_metric.observe(len(chunk.page_content))
+ __class__.chunk_metric.labels(
+ id=consumer.id, flow=consumer.flow
+ ).observe(len(chunk.page_content))
- await self.send(r)
+ await flow("output").send(r)
print("Done.", flush=True)
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ FlowProcessor.add_args(parser)
parser.add_argument(
'-z', '--chunk-size',
@@ -97,5 +102,5 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/config/service/__init__.py b/trustgraph-flow/trustgraph/config/service/__init__.py
new file mode 100644
index 00000000..ba844705
--- /dev/null
+++ b/trustgraph-flow/trustgraph/config/service/__init__.py
@@ -0,0 +1,3 @@
+
+from . service import *
+
diff --git a/trustgraph-flow/trustgraph/config/service/__main__.py b/trustgraph-flow/trustgraph/config/service/__main__.py
new file mode 100644
index 00000000..e9136855
--- /dev/null
+++ b/trustgraph-flow/trustgraph/config/service/__main__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+from . service import run
+
+if __name__ == '__main__':
+ run()
+
diff --git a/trustgraph-flow/trustgraph/config/service/config.py b/trustgraph-flow/trustgraph/config/service/config.py
new file mode 100644
index 00000000..de684ec2
--- /dev/null
+++ b/trustgraph-flow/trustgraph/config/service/config.py
@@ -0,0 +1,269 @@
+
+from trustgraph.schema import ConfigResponse
+from trustgraph.schema import ConfigValue, Error
+
+from ... tables.config import ConfigTableStore
+
+class ConfigurationClass:
+
+ async def keys(self):
+ return await self.table_store.get_keys(self.type)
+
+ async def values(self):
+ vals = await self.table_store.get_values(self.type)
+ return {
+ v[0]: v[1]
+ for v in vals
+ }
+
+ async def get(self, key):
+ return await self.table_store.get_value(self.type, key)
+
+ async def put(self, key, value):
+ return await self.table_store.put_config(self.type, key, value)
+
+ async def delete(self, key):
+ return await self.table_store.delete_key(self.type, key)
+
+ async def has(self, key):
+ val = await self.table_store.get_value(self.type, key)
+ return val is not None
+
+class Configuration:
+
+ # FIXME: The state is held internally. This only works if there's
+ # one config service. Should be more than one, and use a
+ # back-end state store.
+
+ # FIXME: This has state now, but does it address all of the above?
+ # REVIEW: Above
+
+ # FIXME: Some version vs config race conditions
+
+ def __init__(self, push, host, user, password, keyspace):
+
+ # External function to respond to update
+ self.push = push
+
+ self.table_store = ConfigTableStore(
+ host, user, password, keyspace
+ )
+
+ async def inc_version(self):
+ await self.table_store.inc_version()
+
+ async def get_version(self):
+ return await self.table_store.get_version()
+
+ def get(self, type):
+
+ c = ConfigurationClass()
+ c.table_store = self.table_store
+ c.type = type
+
+ return c
+
+ async def handle_get(self, v):
+
+ # for k in v.keys:
+ # if k.type not in self or k.key not in self[k.type]:
+ # return ConfigResponse(
+ # version = None,
+ # values = None,
+ # directory = None,
+ # config = None,
+ # error = Error(
+ # type = "key-error",
+ # message = f"Key error"
+ # )
+ # )
+
+ values = [
+ ConfigValue(
+ type = k.type,
+ key = k.key,
+ value = await self.table_store.get_value(k.type, k.key)
+ )
+ for k in v.keys
+ ]
+
+ return ConfigResponse(
+ version = await self.get_version(),
+ values = values,
+ directory = None,
+ config = None,
+ error = None,
+ )
+
+ async def handle_list(self, v):
+
+ # if v.type not in self:
+
+ # return ConfigResponse(
+ # version = None,
+ # values = None,
+ # directory = None,
+ # config = None,
+ # error = Error(
+ # type = "key-error",
+ # message = "No such type",
+ # ),
+ # )
+
+ return ConfigResponse(
+ version = await self.get_version(),
+ values = None,
+ directory = await self.table_store.get_keys(v.type),
+ config = None,
+ error = None,
+ )
+
+ async def handle_getvalues(self, v):
+
+ # if v.type not in self:
+
+ # return ConfigResponse(
+ # version = None,
+ # values = None,
+ # directory = None,
+ # config = None,
+ # error = Error(
+ # type = "key-error",
+ # message = f"Key error"
+ # )
+ # )
+
+ vals = await self.table_store.get_values(v.type)
+
+ values = map(
+ lambda x: ConfigValue(
+ type = v.type, key = x[0], value = x[1]
+ ),
+ vals
+ )
+
+ return ConfigResponse(
+ version = await self.get_version(),
+ values = list(values),
+ directory = None,
+ config = None,
+ error = None,
+ )
+
+ async def handle_delete(self, v):
+
+ # for k in v.keys:
+ # if k.type not in self or k.key not in self[k.type]:
+ # return ConfigResponse(
+ # version = None,
+ # values = None,
+ # directory = None,
+ # config = None,
+ # error = Error(
+ # type = "key-error",
+ # message = f"Key error"
+ # )
+ # )
+
+ for k in v.keys:
+
+ await self.table_store.delete_key(k.type, k.key)
+
+ await self.inc_version()
+
+ await self.push()
+
+ return ConfigResponse(
+ version = None,
+ value = None,
+ directory = None,
+ values = None,
+ config = None,
+ error = None,
+ )
+
+ async def handle_put(self, v):
+
+ for k in v.values:
+
+ await self.table_store.put_config(k.type, k.key, k.value)
+
+ await self.inc_version()
+
+ await self.push()
+
+ return ConfigResponse(
+ version = None,
+ value = None,
+ directory = None,
+ values = None,
+ error = None,
+ )
+
+ async def get_config(self):
+
+ table = await self.table_store.get_all()
+
+ config = {}
+
+ for row in table:
+ if row[0] not in config:
+ config[row[0]] = {}
+ config[row[0]][row[1]] = row[2]
+
+ return config
+
+ async def handle_config(self, v):
+
+ config = await self.get_config()
+
+ return ConfigResponse(
+ version = await self.get_version(),
+ value = None,
+ directory = None,
+ values = None,
+ config = config,
+ error = None,
+ )
+
+ async def handle(self, msg):
+
+ print("Handle message ", msg.operation)
+
+ if msg.operation == "get":
+
+ resp = await self.handle_get(msg)
+
+ elif msg.operation == "list":
+
+ resp = await self.handle_list(msg)
+
+ elif msg.operation == "getvalues":
+
+ resp = await self.handle_getvalues(msg)
+
+ elif msg.operation == "delete":
+
+ resp = await self.handle_delete(msg)
+
+ elif msg.operation == "put":
+
+ resp = await self.handle_put(msg)
+
+ elif msg.operation == "config":
+
+ resp = await self.handle_config(msg)
+
+ else:
+
+ resp = ConfigResponse(
+ value=None,
+ directory=None,
+ values=None,
+ error=Error(
+ type = "bad-operation",
+ message = "Bad operation"
+ )
+ )
+
+ return resp
diff --git a/trustgraph-flow/trustgraph/config/service/flow.py b/trustgraph-flow/trustgraph/config/service/flow.py
new file mode 100644
index 00000000..83e6835e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/config/service/flow.py
@@ -0,0 +1,252 @@
+
+from trustgraph.schema import FlowResponse, Error
+import json
+
+class FlowConfig:
+ def __init__(self, config):
+
+ self.config = config
+
+ async def handle_list_classes(self, msg):
+
+ names = list(await self.config.get("flow-classes").keys())
+
+ return FlowResponse(
+ error = None,
+ class_names = names,
+ )
+
+ async def handle_get_class(self, msg):
+
+ return FlowResponse(
+ error = None,
+ class_definition = await self.config.get(
+ "flow-classes"
+ ).get(msg.class_name),
+ )
+
+ async def handle_put_class(self, msg):
+
+ await self.config.get("flow-classes").put(
+ msg.class_name, msg.class_definition
+ )
+
+ await self.config.inc_version()
+
+ await self.config.push()
+
+ return FlowResponse(
+ error = None,
+ )
+
+ async def handle_delete_class(self, msg):
+
+ print(msg)
+
+ await self.config.get("flow-classes").delete(msg.class_name)
+
+ await self.config.inc_version()
+
+ await self.config.push()
+
+ return FlowResponse(
+ error = None,
+ )
+
+ async def handle_list_flows(self, msg):
+
+ names = list(await self.config.get("flows").keys())
+
+ return FlowResponse(
+ error = None,
+ flow_ids = names,
+ )
+
+ async def handle_get_flow(self, msg):
+
+ flow = await self.config.get("flows").get(msg.flow_id)
+
+ return FlowResponse(
+ error = None,
+ flow = flow,
+ )
+
+ async def handle_start_flow(self, msg):
+
+ if msg.class_name is None:
+ raise RuntimeError("No class name")
+
+ if msg.flow_id is None:
+ raise RuntimeError("No flow ID")
+
+ if msg.flow_id in await self.config.get("flows").values():
+ raise RuntimeError("Flow already exists")
+
+ if msg.description is None:
+ raise RuntimeError("No description")
+
+ if msg.class_name not in await self.config.get("flow-classes").values():
+ raise RuntimeError("Class does not exist")
+
+ def repl_template(tmp):
+ return tmp.replace(
+ "{class}", msg.class_name
+ ).replace(
+ "{id}", msg.flow_id
+ )
+
+ cls = json.loads(
+ await self.config.get("flow-classes").get(msg.class_name)
+ )
+
+ for kind in ("class", "flow"):
+
+ for k, v in cls[kind].items():
+
+ processor, variant = k.split(":", 1)
+
+ variant = repl_template(variant)
+
+ v = {
+ repl_template(k2): repl_template(v2)
+ for k2, v2 in v.items()
+ }
+
+ flac = await self.config.get("flows-active").values()
+ if processor in flac:
+ target = json.loads(flac[processor])
+ else:
+ target = {}
+
+ if variant not in target:
+ target[variant] = v
+
+ await self.config.get("flows-active").put(
+ processor, json.dumps(target)
+ )
+
+ def repl_interface(i):
+ if isinstance(i, str):
+ return repl_template(i)
+ else:
+ return {
+ k: repl_template(v)
+ for k, v in i.items()
+ }
+
+ if "interfaces" in cls:
+ interfaces = {
+ k: repl_interface(v)
+ for k, v in cls["interfaces"].items()
+ }
+ else:
+ interfaces = {}
+
+ await self.config.get("flows").put(
+ msg.flow_id,
+ json.dumps({
+ "description": msg.description,
+ "class-name": msg.class_name,
+ "interfaces": interfaces,
+ })
+ )
+
+ await self.config.inc_version()
+
+ await self.config.push()
+
+ return FlowResponse(
+ error = None,
+ )
+
+ async def handle_stop_flow(self, msg):
+
+ if msg.flow_id is None:
+ raise RuntimeError("No flow ID")
+
+ if msg.flow_id not in await self.config.get("flows").keys():
+ raise RuntimeError("Flow ID invalid")
+
+ flow = json.loads(await self.config.get("flows").get(msg.flow_id))
+
+ if "class-name" not in flow:
+ raise RuntimeError("Internal error: flow has no flow class")
+
+ class_name = flow["class-name"]
+
+ cls = json.loads(await self.config.get("flow-classes").get(class_name))
+
+ def repl_template(tmp):
+ return tmp.replace(
+ "{class}", class_name
+ ).replace(
+ "{id}", msg.flow_id
+ )
+
+ for kind in ("flow",):
+
+ for k, v in cls[kind].items():
+
+ processor, variant = k.split(":", 1)
+
+ variant = repl_template(variant)
+
+ flac = await self.config.get("flows-active").values()
+
+ if processor in flac:
+ target = json.loads(flac[processor])
+ else:
+ target = {}
+
+ if variant in target:
+ del target[variant]
+
+ await self.config.get("flows-active").put(
+ processor, json.dumps(target)
+ )
+
+ if msg.flow_id in await self.config.get("flows").values():
+ await self.config.get("flows").delete(msg.flow_id)
+
+ await self.config.inc_version()
+
+ await self.config.push()
+
+ return FlowResponse(
+ error = None,
+ )
+
+ async def handle(self, msg):
+
+ print("Handle message ", msg.operation)
+
+ if msg.operation == "list-classes":
+ resp = await self.handle_list_classes(msg)
+ elif msg.operation == "get-class":
+ resp = await self.handle_get_class(msg)
+ elif msg.operation == "put-class":
+ resp = await self.handle_put_class(msg)
+ elif msg.operation == "delete-class":
+ resp = await self.handle_delete_class(msg)
+ elif msg.operation == "list-flows":
+ resp = await self.handle_list_flows(msg)
+ elif msg.operation == "get-flow":
+ resp = await self.handle_get_flow(msg)
+ elif msg.operation == "start-flow":
+ resp = await self.handle_start_flow(msg)
+ elif msg.operation == "stop-flow":
+ resp = await self.handle_stop_flow(msg)
+ else:
+
+ resp = FlowResponse(
+ value=None,
+ directory=None,
+ values=None,
+ error=Error(
+ type = "bad-operation",
+ message = "Bad operation"
+ )
+ )
+
+ return resp
+
diff --git a/trustgraph-flow/trustgraph/config/service/service.py b/trustgraph-flow/trustgraph/config/service/service.py
new file mode 100644
index 00000000..1ef81341
--- /dev/null
+++ b/trustgraph-flow/trustgraph/config/service/service.py
@@ -0,0 +1,295 @@
+
+"""
+Config service. Manages system global configuration state
+"""
+
+from trustgraph.schema import Error
+
+from trustgraph.schema import ConfigRequest, ConfigResponse, ConfigPush
+from trustgraph.schema import config_request_queue, config_response_queue
+from trustgraph.schema import config_push_queue
+
+from trustgraph.schema import FlowRequest, FlowResponse
+from trustgraph.schema import flow_request_queue, flow_response_queue
+
+from trustgraph.base import AsyncProcessor, Consumer, Producer
+
+from . config import Configuration
+from . flow import FlowConfig
+
+from ... base import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
+from ... base import Consumer, Producer
+
+# FIXME: How to ensure this doesn't conflict with other usage?
+keyspace = "config"
+
+default_ident = "config-svc"
+
+default_config_request_queue = config_request_queue
+default_config_response_queue = config_response_queue
+default_config_push_queue = config_push_queue
+
+default_flow_request_queue = flow_request_queue
+default_flow_response_queue = flow_response_queue
+
+default_cassandra_host = "cassandra"
+
+class Processor(AsyncProcessor):
+
+ def __init__(self, **params):
+
+ config_request_queue = params.get(
+ "config_request_queue", default_config_request_queue
+ )
+ config_response_queue = params.get(
+ "config_response_queue", default_config_response_queue
+ )
+ config_push_queue = params.get(
+ "config_push_queue", default_config_push_queue
+ )
+
+ flow_request_queue = params.get(
+ "flow_request_queue", default_flow_request_queue
+ )
+ flow_response_queue = params.get(
+ "flow_response_queue", default_flow_response_queue
+ )
+
+ cassandra_host = params.get("cassandra_host", default_cassandra_host)
+ cassandra_user = params.get("cassandra_user")
+ cassandra_password = params.get("cassandra_password")
+
+ id = params.get("id")
+
+ flow_request_schema = FlowRequest
+ flow_response_schema = FlowResponse
+
+ super(Processor, self).__init__(
+ **params | {
+ "config_request_schema": ConfigRequest.__name__,
+ "config_response_schema": ConfigResponse.__name__,
+ "config_push_schema": ConfigPush.__name__,
+ "flow_request_schema": FlowRequest.__name__,
+ "flow_response_schema": FlowResponse.__name__,
+ "cassandra_host": cassandra_host,
+ "cassandra_user": cassandra_user,
+ }
+ )
+
+ config_request_metrics = ConsumerMetrics(
+ processor = self.id, flow = None, name = "config-request"
+ )
+ config_response_metrics = ProducerMetrics(
+ processor = self.id, flow = None, name = "config-response"
+ )
+ config_push_metrics = ProducerMetrics(
+ processor = self.id, flow = None, name = "config-push"
+ )
+
+ flow_request_metrics = ConsumerMetrics(
+ processor = self.id, flow = None, name = "flow-request"
+ )
+ flow_response_metrics = ProducerMetrics(
+ processor = self.id, flow = None, name = "flow-response"
+ )
+
+ self.config_request_consumer = Consumer(
+ taskgroup = self.taskgroup,
+ client = self.pulsar_client,
+ flow = None,
+ topic = config_request_queue,
+ subscriber = id,
+ schema = ConfigRequest,
+ handler = self.on_config_request,
+ metrics = config_request_metrics,
+ )
+
+ self.config_response_producer = Producer(
+ client = self.pulsar_client,
+ topic = config_response_queue,
+ schema = ConfigResponse,
+ metrics = config_response_metrics,
+ )
+
+ self.config_push_producer = Producer(
+ client = self.pulsar_client,
+ topic = config_push_queue,
+ schema = ConfigPush,
+ metrics = config_push_metrics,
+ )
+
+ self.flow_request_consumer = Consumer(
+ taskgroup = self.taskgroup,
+ client = self.pulsar_client,
+ flow = None,
+ topic = flow_request_queue,
+ subscriber = id,
+ schema = FlowRequest,
+ handler = self.on_flow_request,
+ metrics = flow_request_metrics,
+ )
+
+ self.flow_response_producer = Producer(
+ client = self.pulsar_client,
+ topic = flow_response_queue,
+ schema = FlowResponse,
+ metrics = flow_response_metrics,
+ )
+
+ self.config = Configuration(
+ host = cassandra_host.split(","),
+ user = cassandra_user,
+ password = cassandra_password,
+ keyspace = keyspace,
+ push = self.push
+ )
+
+ self.flow = FlowConfig(self.config)
+
+ print("Service initialised.")
+
+ async def start(self):
+
+ await self.push()
+ await self.config_request_consumer.start()
+ await self.flow_request_consumer.start()
+
+ async def push(self):
+
+ config = await self.config.get_config()
+ version = await self.config.get_version()
+
+ resp = ConfigPush(
+ version = version,
+ value = None,
+ directory = None,
+ values = None,
+ config = config,
+ error = None,
+ )
+
+ await self.config_push_producer.send(resp)
+
+ # Race condition, should make sure version & config sync
+
+ print("Pushed version ", await self.config.get_version())
+
+ async def on_config_request(self, msg, consumer, flow):
+
+ try:
+
+ v = msg.value()
+
+ # Sender-produced ID
+ id = msg.properties()["id"]
+
+ print(f"Handling {id}...", flush=True)
+
+ resp = await self.config.handle(v)
+
+ await self.config_response_producer.send(
+ resp, properties={"id": id}
+ )
+
+ except Exception as e:
+
+ resp = ConfigResponse(
+ error=Error(
+ type = "config-error",
+ message = str(e),
+ ),
+ text=None,
+ )
+
+ await self.config_response_producer.send(
+ resp, properties={"id": id}
+ )
+
+ async def on_flow_request(self, msg, consumer, flow):
+
+ try:
+
+ v = msg.value()
+
+ # Sender-produced ID
+ id = msg.properties()["id"]
+
+ print(f"Handling {id}...", flush=True)
+
+ resp = await self.flow.handle(v)
+
+ await self.flow_response_producer.send(
+ resp, properties={"id": id}
+ )
+
+ except Exception as e:
+
+ resp = FlowResponse(
+ error=Error(
+ type = "flow-error",
+ message = str(e),
+ ),
+ text=None,
+ )
+
+ await self.flow_response_producer.send(
+ resp, properties={"id": id}
+ )
+
+ @staticmethod
+ def add_args(parser):
+
+ AsyncProcessor.add_args(parser)
+
+ parser.add_argument(
+ '--config-request-queue',
+ default=default_config_request_queue,
+ help=f'Config request queue (default: {default_config_request_queue})'
+ )
+
+ parser.add_argument(
+ '--config-response-queue',
+ default=default_config_response_queue,
+ help=f'Config response queue {default_config_response_queue}',
+ )
+
+ parser.add_argument(
+ '--push-queue',
+ default=default_config_push_queue,
+ help=f'Config push queue (default: {default_config_push_queue})'
+ )
+
+ parser.add_argument(
+ '--flow-request-queue',
+ default=default_flow_request_queue,
+ help=f'Flow request queue (default: {default_flow_request_queue})'
+ )
+
+ parser.add_argument(
+ '--flow-response-queue',
+ default=default_flow_response_queue,
+ help=f'Flow response queue {default_flow_response_queue}',
+ )
+
+ parser.add_argument(
+ '--cassandra-host',
+ default="cassandra",
+ help=f'Graph host (default: cassandra)'
+ )
+
+ parser.add_argument(
+ '--cassandra-user',
+ default=None,
+ help=f'Cassandra user'
+ )
+
+ parser.add_argument(
+ '--cassandra-password',
+ default=None,
+ help=f'Cassandra password'
+ )
+
+def run():
+
+ Processor.launch(default_ident, __doc__)
+
diff --git a/trustgraph-flow/trustgraph/cores/__init__.py b/trustgraph-flow/trustgraph/cores/__init__.py
new file mode 100644
index 00000000..9843ccfb
--- /dev/null
+++ b/trustgraph-flow/trustgraph/cores/__init__.py
@@ -0,0 +1,3 @@
+
+from . service import run
+
diff --git a/trustgraph-flow/trustgraph/cores/__main__.py b/trustgraph-flow/trustgraph/cores/__main__.py
new file mode 100644
index 00000000..1729017e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/cores/__main__.py
@@ -0,0 +1,5 @@
+
+from . service import run
+
+if __name__ == '__main__':
+ run()
diff --git a/trustgraph-flow/trustgraph/cores/knowledge.py b/trustgraph-flow/trustgraph/cores/knowledge.py
new file mode 100644
index 00000000..8c082601
--- /dev/null
+++ b/trustgraph-flow/trustgraph/cores/knowledge.py
@@ -0,0 +1,285 @@
+
+from .. schema import KnowledgeResponse, Error, Triples, GraphEmbeddings
+from .. knowledge import hash
+from .. exceptions import RequestError
+from .. tables.knowledge import KnowledgeTableStore
+from .. base import Publisher
+
+import base64
+import asyncio
+import uuid
+
+class KnowledgeManager:
+
+ def __init__(
+ self, cassandra_host, cassandra_user, cassandra_password,
+ keyspace, flow_config,
+ ):
+
+ self.table_store = KnowledgeTableStore(
+ cassandra_host, cassandra_user, cassandra_password, keyspace
+ )
+
+ self.loader_queue = asyncio.Queue(maxsize=20)
+ self.background_task = None
+ self.flow_config = flow_config
+
+ async def delete_kg_core(self, request, respond):
+
+ print("Deleting core...", flush=True)
+
+ await self.table_store.delete_kg_core(
+ request.user, request.id
+ )
+
+ await respond(
+ KnowledgeResponse(
+ error = None,
+ ids = None,
+ eos = False,
+ triples = None,
+ graph_embeddings = None,
+ )
+ )
+
+ async def get_kg_core(self, request, respond):
+
+ print("Get core...", flush=True)
+
+ async def publish_triples(t):
+ await respond(
+ KnowledgeResponse(
+ error = None,
+ ids = None,
+ eos = False,
+ triples = t,
+ graph_embeddings = None,
+ )
+ )
+
+ # Remove doc table row
+ await self.table_store.get_triples(
+ request.user,
+ request.id,
+ publish_triples,
+ )
+
+ async def publish_ge(g):
+ await respond(
+ KnowledgeResponse(
+ error = None,
+ ids = None,
+ eos = False,
+ triples = None,
+ graph_embeddings = g,
+ )
+ )
+
+ # Remove doc table row
+ await self.table_store.get_graph_embeddings(
+ request.user,
+ request.id,
+ publish_ge,
+ )
+
+ print("Get complete", flush=True)
+
+ await respond(
+ KnowledgeResponse(
+ error = None,
+ ids = None,
+ eos = True,
+ triples = None,
+ graph_embeddings = None,
+ )
+ )
+
+ async def list_kg_cores(self, request, respond):
+
+ ids = await self.table_store.list_kg_cores(request.user)
+
+ await respond(
+ KnowledgeResponse(
+ error = None,
+ ids = ids,
+ eos = False,
+ triples = None,
+ graph_embeddings = None
+ )
+ )
+
+ async def put_kg_core(self, request, respond):
+
+ if request.triples:
+ await self.table_store.add_triples(request.triples)
+
+ if request.graph_embeddings:
+ await self.table_store.add_graph_embeddings(
+ request.graph_embeddings
+ )
+
+ await respond(
+ KnowledgeResponse(
+ error = None,
+ ids = None,
+ eos = False,
+ triples = None,
+ graph_embeddings = None
+ )
+ )
+
+ async def load_kg_core(self, request, respond):
+
+ if self.background_task is None:
+ self.background_task = asyncio.create_task(
+ self.core_loader()
+ )
+ # Wait for it to start (yuck)
+# await asyncio.sleep(0.5)
+
+ await self.loader_queue.put((request, respond))
+
+ # Not sending a response, the loader thread can do that
+
+ async def unload_kg_core(self, request, respond):
+
+ await respond(
+ KnowledgeResponse(
+ error = Error(
+ type = "not-implemented",
+ message = "Not implemented"
+ ),
+ ids = None,
+ eos = False,
+ triples = None,
+ graph_embeddings = None
+ )
+ )
+
+ async def core_loader(self):
+
+ print("Running...", flush=True)
+ while True:
+
+ print("Wait for next load...", flush=True)
+ request, respond = await self.loader_queue.get()
+
+ print("Loading...", request.id, flush=True)
+
+ try:
+
+ if request.id is None:
+ raise RuntimeError("Core ID must be specified")
+
+ if request.flow is None:
+ raise RuntimeError("Flow ID must be specified")
+
+ if request.flow not in self.flow_config.flows:
+ raise RuntimeError("Invalid flow")
+
+ flow = self.flow_config.flows[request.flow]
+
+ if "interfaces" not in flow:
+ raise RuntimeError("No defined interfaces")
+
+ if "triples-store" not in flow["interfaces"]:
+ raise RuntimeError("Flow has no triples-store")
+
+ if "graph-embeddings-store" not in flow["interfaces"]:
+ raise RuntimeError("Flow has no graph-embeddings-store")
+
+ t_q = flow["interfaces"]["triples-store"]
+ ge_q = flow["interfaces"]["graph-embeddings-store"]
+
+ # Got this far, it should all work
+ await respond(
+ KnowledgeResponse(
+ error = None,
+ ids = None,
+ eos = False,
+ triples = None,
+ graph_embeddings = None
+ )
+ )
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+ await respond(
+ KnowledgeResponse(
+ error = Error(
+ type = "load-error",
+ message = str(e),
+ ),
+ ids = None,
+ eos = False,
+ triples = None,
+ graph_embeddings = None
+ )
+ )
+
+
+ print("Going to start loading...", flush=True)
+
+ try:
+
+ t_pub = None
+ ge_pub = None
+
+ print(t_q, flush=True)
+ print(ge_q, flush=True)
+
+ t_pub = Publisher(
+ self.flow_config.pulsar_client, t_q,
+ schema=Triples,
+ )
+ ge_pub = Publisher(
+ self.flow_config.pulsar_client, ge_q,
+ schema=GraphEmbeddings
+ )
+
+ print("Start publishers...", flush=True)
+
+ await t_pub.start()
+ await ge_pub.start()
+
+ async def publish_triples(t):
+ await t_pub.send(None, t)
+
+ print("Publish triples...", flush=True)
+
+ # Remove doc table row
+ await self.table_store.get_triples(
+ request.user,
+ request.id,
+ publish_triples,
+ )
+
+ async def publish_ge(g):
+ await ge_pub.send(None, g)
+
+ print("Publish GEs...", flush=True)
+
+ # Remove doc table row
+ await self.table_store.get_graph_embeddings(
+ request.user,
+ request.id,
+ publish_ge,
+ )
+
+ print("Completed that.", flush=True)
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+
+ finally:
+
+ print("Stopping publishers...", flush=True)
+
+ if t_pub: await t_pub.stop()
+ if ge_pub: await ge_pub.stop()
+
+ print("Done", flush=True)
+
+ continue
diff --git a/trustgraph-flow/trustgraph/cores/service.py b/trustgraph-flow/trustgraph/cores/service.py
new file mode 100755
index 00000000..810d159d
--- /dev/null
+++ b/trustgraph-flow/trustgraph/cores/service.py
@@ -0,0 +1,230 @@
+
+"""
+Knowledge core service, manages cores and exports them
+"""
+
+from functools import partial
+import asyncio
+import base64
+import json
+
+from .. base import AsyncProcessor, Consumer, Producer, Publisher, Subscriber
+from .. base import ConsumerMetrics, ProducerMetrics
+
+from .. schema import KnowledgeRequest, KnowledgeResponse, Error
+from .. schema import knowledge_request_queue, knowledge_response_queue
+
+from .. schema import Document, Metadata
+from .. schema import TextDocument, Metadata
+
+from .. exceptions import RequestError
+
+from . knowledge import KnowledgeManager
+
+default_ident = "knowledge"
+
+default_knowledge_request_queue = knowledge_request_queue
+default_knowledge_response_queue = knowledge_response_queue
+
+default_cassandra_host = "cassandra"
+
+# FIXME: How to ensure this doesn't conflict with other usage?
+keyspace = "knowledge"
+
+class Processor(AsyncProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ knowledge_request_queue = params.get(
+ "knowledge_request_queue", default_knowledge_request_queue
+ )
+
+ knowledge_response_queue = params.get(
+ "knowledge_response_queue", default_knowledge_response_queue
+ )
+
+ cassandra_host = params.get("cassandra_host", default_cassandra_host)
+ cassandra_user = params.get("cassandra_user")
+ cassandra_password = params.get("cassandra_password")
+
+ super(Processor, self).__init__(
+ **params | {
+ "knowledge_request_queue": knowledge_request_queue,
+ "knowledge_response_queue": knowledge_response_queue,
+ "cassandra_host": cassandra_host,
+ "cassandra_user": cassandra_user,
+ }
+ )
+
+ knowledge_request_metrics = ConsumerMetrics(
+ processor = self.id, flow = None, name = "knowledge-request"
+ )
+
+ knowledge_response_metrics = ProducerMetrics(
+ processor = self.id, flow = None, name = "knowledge-response"
+ )
+
+ self.knowledge_request_consumer = Consumer(
+ taskgroup = self.taskgroup,
+ client = self.pulsar_client,
+ flow = None,
+ topic = knowledge_request_queue,
+ subscriber = id,
+ schema = KnowledgeRequest,
+ handler = self.on_knowledge_request,
+ metrics = knowledge_request_metrics,
+ )
+
+ self.knowledge_response_producer = Producer(
+ client = self.pulsar_client,
+ topic = knowledge_response_queue,
+ schema = KnowledgeResponse,
+ metrics = knowledge_response_metrics,
+ )
+
+ self.knowledge = KnowledgeManager(
+ cassandra_host = cassandra_host.split(","),
+ cassandra_user = cassandra_user,
+ cassandra_password = cassandra_password,
+ keyspace = keyspace,
+ flow_config = self,
+ )
+
+ self.register_config_handler(self.on_knowledge_config)
+
+ self.flows = {}
+
+ print("Initialised.", flush=True)
+
+ async def start(self):
+
+ await super(Processor, self).start()
+ await self.knowledge_request_consumer.start()
+ await self.knowledge_response_producer.start()
+
+ async def on_knowledge_config(self, config, version):
+
+ print("config version", version)
+
+ if "flows" in config:
+
+ self.flows = {
+ k: json.loads(v)
+ for k, v in config["flows"].items()
+ }
+
+ print(self.flows)
+
+ async def process_request(self, v, id):
+
+ if v.operation is None:
+ raise RequestError("Null operation")
+
+ print("request", v.operation)
+
+ impls = {
+ "list-kg-cores": self.knowledge.list_kg_cores,
+ "get-kg-core": self.knowledge.get_kg_core,
+ "delete-kg-core": self.knowledge.delete_kg_core,
+ "put-kg-core": self.knowledge.put_kg_core,
+ "load-kg-core": self.knowledge.load_kg_core,
+ "unload-kg-core": self.knowledge.unload_kg_core,
+ }
+
+ if v.operation not in impls:
+ raise RequestError(f"Invalid operation: {v.operation}")
+
+ async def respond(x):
+ await self.knowledge_response_producer.send(
+ x, { "id": id }
+ )
+ return await impls[v.operation](v, respond)
+
+ async def on_knowledge_request(self, msg, consumer, flow):
+
+ v = msg.value()
+
+ # Sender-produced ID
+
+ id = msg.properties()["id"]
+
+ print(f"Handling input {id}...", flush=True)
+
+ try:
+
+ # We don't send a response back here, the processing
+ # implementation sends whatever it needs to send.
+ await self.process_request(v, id)
+
+ return
+
+ except RequestError as e:
+ resp = KnowledgeResponse(
+ error = Error(
+ type = "request-error",
+ message = str(e),
+ )
+ )
+
+ await self.knowledge_response_producer.send(
+ resp, properties={"id": id}
+ )
+
+ return
+ except Exception as e:
+ resp = KnowledgeResponse(
+ error = Error(
+ type = "unexpected-error",
+ message = str(e),
+ )
+ )
+
+ await self.knowledge_response_producer.send(
+ resp, properties={"id": id}
+ )
+
+ return
+
+ print("Done.", flush=True)
+
+ @staticmethod
+ def add_args(parser):
+
+ AsyncProcessor.add_args(parser)
+
+ parser.add_argument(
+ '--knowledge-request-queue',
+ default=default_knowledge_request_queue,
+ help=f'Config request queue (default: {default_knowledge_request_queue})'
+ )
+
+ parser.add_argument(
+ '--knowledge-response-queue',
+ default=default_knowledge_response_queue,
+ help=f'Config response queue {default_knowledge_response_queue}',
+ )
+
+ parser.add_argument(
+ '--cassandra-host',
+ default="cassandra",
+ help=f'Graph host (default: cassandra)'
+ )
+
+ parser.add_argument(
+ '--cassandra-user',
+ default=None,
+ help=f'Cassandra user'
+ )
+
+ parser.add_argument(
+ '--cassandra-password',
+ default=None,
+ help=f'Cassandra password'
+ )
+
+def run():
+
+ Processor.launch(default_ident, __doc__)
+
diff --git a/trustgraph-flow/trustgraph/decoding/mistral_ocr/processor.py b/trustgraph-flow/trustgraph/decoding/mistral_ocr/processor.py
index f5100244..e42d1601 100755
--- a/trustgraph-flow/trustgraph/decoding/mistral_ocr/processor.py
+++ b/trustgraph-flow/trustgraph/decoding/mistral_ocr/processor.py
@@ -17,12 +17,10 @@ from mistralai.models import OCRResponse
from ... schema import Document, TextDocument, Metadata
from ... schema import document_ingest_queue, text_ingest_queue
from ... log_level import LogLevel
-from ... base import ConsumerProducer
+from ... base import InputOutputProcessor
-module = ".".join(__name__.split(".")[1:-1])
+module = "ocr"
-default_input_queue = document_ingest_queue
-default_output_queue = text_ingest_queue
default_subscriber = module
default_api_key = os.getenv("MISTRAL_TOKEN")
@@ -71,19 +69,17 @@ def get_combined_markdown(ocr_response: OCRResponse) -> str:
return "\n\n".join(markdowns)
-class Processor(ConsumerProducer):
+class Processor(InputOutputProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
+ id = params.get("id")
subscriber = params.get("subscriber", default_subscriber)
api_key = params.get("api_key", default_api_key)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
+ "id": id,
"subscriber": subscriber,
"input_schema": Document,
"output_schema": TextDocument,
@@ -151,7 +147,7 @@ class Processor(ConsumerProducer):
return markdown
- async def handle(self, msg):
+ async def on_message(self, msg, consumer):
print("PDF message received")
@@ -166,17 +162,14 @@ class Processor(ConsumerProducer):
text=markdown.encode("utf-8"),
)
- await self.send(r)
+ await consumer.q.output.send(r)
print("Done.", flush=True)
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ InputOutputProcessor.add_args(parser, default_subscriber)
parser.add_argument(
'-k', '--api-key',
diff --git a/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py b/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py
index 5e5e3612..3f836832 100755
--- a/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py
+++ b/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py
@@ -9,39 +9,42 @@ import base64
from langchain_community.document_loaders import PyPDFLoader
from ... schema import Document, TextDocument, Metadata
-from ... schema import document_ingest_queue, text_ingest_queue
-from ... log_level import LogLevel
-from ... base import ConsumerProducer
+from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "pdf-decoder"
-default_input_queue = document_ingest_queue
-default_output_queue = text_ingest_queue
-default_subscriber = module
-
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
+ id = params.get("id", default_ident)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": Document,
- "output_schema": TextDocument,
+ "id": id,
}
)
- print("PDF inited")
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = Document,
+ handler = self.on_message,
+ )
+ )
- async def handle(self, msg):
+ self.register_specification(
+ ProducerSpec(
+ name = "output",
+ schema = TextDocument,
+ )
+ )
- print("PDF message received")
+ print("PDF inited", flush=True)
+
+ async def on_message(self, msg, consumer, flow):
+
+ print("PDF message received", flush=True)
v = msg.value()
@@ -59,24 +62,22 @@ class Processor(ConsumerProducer):
for ix, page in enumerate(pages):
+ print("page", ix, flush=True)
+
r = TextDocument(
metadata=v.metadata,
text=page.page_content.encode("utf-8"),
)
- await self.send(r)
+ await flow("output").send(r)
print("Done.", flush=True)
@staticmethod
def add_args(parser):
-
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ FlowProcessor.add_args(parser)
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/document_rag.py b/trustgraph-flow/trustgraph/document_rag.py
deleted file mode 100644
index 4fc4850a..00000000
--- a/trustgraph-flow/trustgraph/document_rag.py
+++ /dev/null
@@ -1,153 +0,0 @@
-
-from . clients.document_embeddings_client import DocumentEmbeddingsClient
-from . clients.triples_query_client import TriplesQueryClient
-from . clients.embeddings_client import EmbeddingsClient
-from . clients.prompt_client import PromptClient
-
-from . schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
-from . schema import TriplesQueryRequest, TriplesQueryResponse
-from . schema import prompt_request_queue
-from . schema import prompt_response_queue
-from . schema import embeddings_request_queue
-from . schema import embeddings_response_queue
-from . schema import document_embeddings_request_queue
-from . schema import document_embeddings_response_queue
-
-LABEL="http://www.w3.org/2000/01/rdf-schema#label"
-DEFINITION="http://www.w3.org/2004/02/skos/core#definition"
-
-class Query:
-
- def __init__(
- self, rag, user, collection, verbose,
- doc_limit=20
- ):
- self.rag = rag
- self.user = user
- self.collection = collection
- self.verbose = verbose
- self.doc_limit = doc_limit
-
- def get_vector(self, query):
-
- if self.verbose:
- print("Compute embeddings...", flush=True)
-
- qembeds = self.rag.embeddings.request(query)
-
- if self.verbose:
- print("Done.", flush=True)
-
- return qembeds
-
- def get_docs(self, query):
-
- vectors = self.get_vector(query)
-
- if self.verbose:
- print("Get entities...", flush=True)
-
- docs = self.rag.de_client.request(
- vectors, limit=self.doc_limit
- )
-
- if self.verbose:
- print("Docs:", flush=True)
- for doc in docs:
- print(doc, flush=True)
-
- return docs
-
-class DocumentRag:
-
- def __init__(
- self,
- pulsar_host="pulsar://pulsar:6650",
- pulsar_api_key=None,
- pr_request_queue=None,
- pr_response_queue=None,
- emb_request_queue=None,
- emb_response_queue=None,
- de_request_queue=None,
- de_response_queue=None,
- verbose=False,
- module="test",
- ):
-
- self.verbose=verbose
-
- if pr_request_queue is None:
- pr_request_queue = prompt_request_queue
-
- if pr_response_queue is None:
- pr_response_queue = prompt_response_queue
-
- if emb_request_queue is None:
- emb_request_queue = embeddings_request_queue
-
- if emb_response_queue is None:
- emb_response_queue = embeddings_response_queue
-
- if de_request_queue is None:
- de_request_queue = document_embeddings_request_queue
-
- if de_response_queue is None:
- de_response_queue = document_embeddings_response_queue
-
- if self.verbose:
- print("Initialising...", flush=True)
-
- self.de_client = DocumentEmbeddingsClient(
- pulsar_host=pulsar_host,
- subscriber=module + "-de",
- input_queue=de_request_queue,
- output_queue=de_response_queue,
- pulsar_api_key=pulsar_api_key,
- )
-
- self.embeddings = EmbeddingsClient(
- pulsar_host=pulsar_host,
- input_queue=emb_request_queue,
- output_queue=emb_response_queue,
- subscriber=module + "-emb",
- pulsar_api_key=pulsar_api_key,
- )
-
- self.lang = PromptClient(
- pulsar_host=pulsar_host,
- input_queue=pr_request_queue,
- output_queue=pr_response_queue,
- subscriber=module + "-de-prompt",
- pulsar_api_key=pulsar_api_key,
- )
-
- if self.verbose:
- print("Initialised", flush=True)
-
- def query(
- self, query, user="trustgraph", collection="default",
- doc_limit=20,
- ):
-
- if self.verbose:
- print("Construct prompt...", flush=True)
-
- q = Query(
- rag=self, user=user, collection=collection, verbose=self.verbose,
- doc_limit=doc_limit
- )
-
- docs = q.get_docs(query)
-
- if self.verbose:
- print("Invoke LLM...", flush=True)
- print(docs)
- print(query)
-
- resp = self.lang.request_document_prompt(query, docs)
-
- if self.verbose:
- print("Done", flush=True)
-
- return resp
-
diff --git a/trustgraph-flow/trustgraph/embeddings/document_embeddings/embeddings.py b/trustgraph-flow/trustgraph/embeddings/document_embeddings/embeddings.py
index 70f53e07..95e5462d 100755
--- a/trustgraph-flow/trustgraph/embeddings/document_embeddings/embeddings.py
+++ b/trustgraph-flow/trustgraph/embeddings/document_embeddings/embeddings.py
@@ -6,61 +6,63 @@ Output is chunk plus embedding.
"""
from ... schema import Chunk, ChunkEmbeddings, DocumentEmbeddings
-from ... schema import chunk_ingest_queue
-from ... schema import document_embeddings_store_queue
-from ... schema import embeddings_request_queue, embeddings_response_queue
-from ... clients.embeddings_client import EmbeddingsClient
-from ... log_level import LogLevel
-from ... base import ConsumerProducer
+from ... schema import EmbeddingsRequest, EmbeddingsResponse
-module = ".".join(__name__.split(".")[1:-1])
+from ... base import FlowProcessor, RequestResponseSpec, ConsumerSpec
+from ... base import ProducerSpec
-default_input_queue = chunk_ingest_queue
-default_output_queue = document_embeddings_store_queue
-default_subscriber = module
+default_ident = "document-embeddings"
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
- emb_request_queue = params.get(
- "embeddings_request_queue", embeddings_request_queue
- )
- emb_response_queue = params.get(
- "embeddings_response_queue", embeddings_response_queue
- )
+ id = params.get("id")
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "embeddings_request_queue": emb_request_queue,
- "embeddings_response_queue": emb_response_queue,
- "subscriber": subscriber,
- "input_schema": Chunk,
- "output_schema": DocumentEmbeddings,
+ "id": id,
}
)
- self.embeddings = EmbeddingsClient(
- pulsar_host=self.pulsar_host,
- pulsar_api_key=self.pulsar_api_key,
- input_queue=emb_request_queue,
- output_queue=emb_response_queue,
- subscriber=module + "-emb",
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = Chunk,
+ handler = self.on_message,
+ )
)
- async def handle(self, msg):
+ self.register_specification(
+ RequestResponseSpec(
+ request_name = "embeddings-request",
+ request_schema = EmbeddingsRequest,
+ response_name = "embeddings-response",
+ response_schema = EmbeddingsResponse,
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "output",
+ schema = DocumentEmbeddings
+ )
+ )
+
+ async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
try:
- vectors = self.embeddings.request(v.chunk)
+ resp = await flow("embeddings-request").request(
+ EmbeddingsRequest(
+ text = v.chunk
+ )
+ )
+
+ vectors = resp.vectors
embeds = [
ChunkEmbeddings(
@@ -74,7 +76,7 @@ class Processor(ConsumerProducer):
chunks=embeds,
)
- await self.send(r)
+ await flow("output").send(r)
except Exception as e:
print("Exception:", e, flush=True)
@@ -87,24 +89,9 @@ class Processor(ConsumerProducer):
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
-
- parser.add_argument(
- '--embeddings-request-queue',
- default=embeddings_request_queue,
- help=f'Embeddings request queue (default: {embeddings_request_queue})',
- )
-
- parser.add_argument(
- '--embeddings-response-queue',
- default=embeddings_response_queue,
- help=f'Embeddings request queue (default: {embeddings_response_queue})',
- )
+ FlowProcessor.add_args(parser)
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/embeddings/fastembed/processor.py b/trustgraph-flow/trustgraph/embeddings/fastembed/processor.py
index bc164fa0..a4ae35dc 100755
--- a/trustgraph-flow/trustgraph/embeddings/fastembed/processor.py
+++ b/trustgraph-flow/trustgraph/embeddings/fastembed/processor.py
@@ -1,81 +1,43 @@
"""
-Embeddings service, applies an embeddings model selected from HuggingFace.
+Embeddings service, applies an embeddings model using fastembed
Input is text, output is embeddings vector.
"""
-from ... schema import EmbeddingsRequest, EmbeddingsResponse
-from ... schema import embeddings_request_queue, embeddings_response_queue
-from ... log_level import LogLevel
-from ... base import ConsumerProducer
+from ... base import EmbeddingsService
+
from fastembed import TextEmbedding
-import os
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "embeddings"
-default_input_queue = embeddings_request_queue
-default_output_queue = embeddings_response_queue
-default_subscriber = module
default_model="sentence-transformers/all-MiniLM-L6-v2"
-class Processor(ConsumerProducer):
+class Processor(EmbeddingsService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
-
model = params.get("model", default_model)
super(Processor, self).__init__(
- **params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": EmbeddingsRequest,
- "output_schema": EmbeddingsResponse,
- "model": model,
- }
+ **params | { "model": model }
)
+ print("Get model...", flush=True)
self.embeddings = TextEmbedding(model_name = model)
- async def handle(self, msg):
+ async def on_embeddings(self, text):
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling input {id}...", flush=True)
-
- text = v.text
vecs = self.embeddings.embed([text])
- vecs = [
+ return [
v.tolist()
for v in vecs
]
- print("Send response...", flush=True)
- r = EmbeddingsResponse(
- vectors=list(vecs),
- error=None,
- )
-
- await self.send(r, properties={"id": id})
-
- print("Done.", flush=True)
-
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ EmbeddingsService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -85,5 +47,5 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/embeddings/graph_embeddings/embeddings.py b/trustgraph-flow/trustgraph/embeddings/graph_embeddings/embeddings.py
index 2cbe9907..043be3a7 100755
--- a/trustgraph-flow/trustgraph/embeddings/graph_embeddings/embeddings.py
+++ b/trustgraph-flow/trustgraph/embeddings/graph_embeddings/embeddings.py
@@ -6,53 +6,48 @@ Output is entity plus embedding.
"""
from ... schema import EntityContexts, EntityEmbeddings, GraphEmbeddings
-from ... schema import entity_contexts_ingest_queue
-from ... schema import graph_embeddings_store_queue
-from ... schema import embeddings_request_queue, embeddings_response_queue
-from ... clients.embeddings_client import EmbeddingsClient
-from ... log_level import LogLevel
-from ... base import ConsumerProducer
+from ... schema import EmbeddingsRequest, EmbeddingsResponse
-module = ".".join(__name__.split(".")[1:-1])
+from ... base import FlowProcessor, EmbeddingsClientSpec, ConsumerSpec
+from ... base import ProducerSpec
-default_input_queue = entity_contexts_ingest_queue
-default_output_queue = graph_embeddings_store_queue
-default_subscriber = module
+default_ident = "graph-embeddings"
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
- emb_request_queue = params.get(
- "embeddings_request_queue", embeddings_request_queue
- )
- emb_response_queue = params.get(
- "embeddings_response_queue", embeddings_response_queue
- )
+ id = params.get("id")
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "embeddings_request_queue": emb_request_queue,
- "embeddings_response_queue": emb_response_queue,
- "subscriber": subscriber,
- "input_schema": EntityContexts,
- "output_schema": GraphEmbeddings,
+ "id": id,
}
)
- self.embeddings = EmbeddingsClient(
- pulsar_host=self.pulsar_host,
- input_queue=emb_request_queue,
- output_queue=emb_response_queue,
- subscriber=module + "-emb",
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = EntityContexts,
+ handler = self.on_message,
+ )
)
- async def handle(self, msg):
+ self.register_specification(
+ EmbeddingsClientSpec(
+ request_name = "embeddings-request",
+ response_name = "embeddings-response",
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "output",
+ schema = GraphEmbeddings
+ )
+ )
+
+ async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
@@ -63,7 +58,9 @@ class Processor(ConsumerProducer):
for entity in v.entities:
- vectors = self.embeddings.request(entity.context)
+ vectors = await flow("embeddings-request").embed(
+ text = entity.context
+ )
entities.append(
EntityEmbeddings(
@@ -77,7 +74,7 @@ class Processor(ConsumerProducer):
entities=entities,
)
- await self.send(r)
+ await flow("output").send(r)
except Exception as e:
print("Exception:", e, flush=True)
@@ -90,24 +87,9 @@ class Processor(ConsumerProducer):
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
-
- parser.add_argument(
- '--embeddings-request-queue',
- default=embeddings_request_queue,
- help=f'Embeddings request queue (default: {embeddings_request_queue})',
- )
-
- parser.add_argument(
- '--embeddings-response-queue',
- default=embeddings_response_queue,
- help=f'Embeddings request queue (default: {embeddings_response_queue})',
- )
+ FlowProcessor.add_args(parser)
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/embeddings/ollama/processor.py b/trustgraph-flow/trustgraph/embeddings/ollama/processor.py
index c441b9c6..86787316 100755
--- a/trustgraph-flow/trustgraph/embeddings/ollama/processor.py
+++ b/trustgraph-flow/trustgraph/embeddings/ollama/processor.py
@@ -11,7 +11,7 @@ from ... base import ConsumerProducer
from ollama import Client
import os
-module = ".".join(__name__.split(".")[1:-1])
+module = "embeddings"
default_input_queue = embeddings_request_queue
default_output_queue = embeddings_response_queue
diff --git a/trustgraph-flow/trustgraph/external/wikipedia/service.py b/trustgraph-flow/trustgraph/external/wikipedia/service.py
index cc002765..f7de78da 100644
--- a/trustgraph-flow/trustgraph/external/wikipedia/service.py
+++ b/trustgraph-flow/trustgraph/external/wikipedia/service.py
@@ -11,7 +11,7 @@ from trustgraph.log_level import LogLevel
from trustgraph.base import ConsumerProducer
import requests
-module = ".".join(__name__.split(".")[1:-1])
+module = "wikipedia"
default_input_queue = encyclopedia_lookup_request_queue
default_output_queue = encyclopedia_lookup_response_queue
diff --git a/trustgraph-flow/trustgraph/extract/kg/definitions/extract.py b/trustgraph-flow/trustgraph/extract/kg/definitions/extract.py
index 47c99802..f95dadf9 100755
--- a/trustgraph-flow/trustgraph/extract/kg/definitions/extract.py
+++ b/trustgraph-flow/trustgraph/extract/kg/definitions/extract.py
@@ -5,84 +5,62 @@ get entity definitions which are output as graph edges along with
entity/context definitions for embedding.
"""
+import json
import urllib.parse
-from pulsar.schema import JsonSchema
from .... schema import Chunk, Triple, Triples, Metadata, Value
from .... schema import EntityContext, EntityContexts
-from .... schema import chunk_ingest_queue, triples_store_queue
-from .... schema import entity_contexts_ingest_queue
-from .... schema import prompt_request_queue
-from .... schema import prompt_response_queue
-from .... log_level import LogLevel
-from .... clients.prompt_client import PromptClient
+from .... schema import PromptRequest, PromptResponse
from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
-from .... base import ConsumerProducer
+
+from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
+from .... base import PromptClientSpec
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True)
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "kg-extract-definitions"
-default_input_queue = chunk_ingest_queue
-default_output_queue = triples_store_queue
-default_entity_context_queue = entity_contexts_ingest_queue
-default_subscriber = module
-
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- ec_queue = params.get(
- "entity_context_queue",
- default_entity_context_queue
- )
- subscriber = params.get("subscriber", default_subscriber)
- pr_request_queue = params.get(
- "prompt_request_queue", prompt_request_queue
- )
- pr_response_queue = params.get(
- "prompt_response_queue", prompt_response_queue
- )
+ id = params.get("id")
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": Chunk,
- "output_schema": Triples,
- "prompt_request_queue": pr_request_queue,
- "prompt_response_queue": pr_response_queue,
+ "id": id,
}
)
- self.ec_prod = self.client.create_producer(
- topic=ec_queue,
- schema=JsonSchema(EntityContexts),
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = Chunk,
+ handler = self.on_message
+ )
)
- __class__.pubsub_metric.info({
- "input_queue": input_queue,
- "output_queue": output_queue,
- "entity_context_queue": ec_queue,
- "prompt_request_queue": pr_request_queue,
- "prompt_response_queue": pr_response_queue,
- "subscriber": subscriber,
- "input_schema": Chunk.__name__,
- "output_schema": Triples.__name__,
- "vector_schema": EntityContexts.__name__,
- })
+ self.register_specification(
+ PromptClientSpec(
+ request_name = "prompt-request",
+ response_name = "prompt-response",
+ )
+ )
- self.prompt = PromptClient(
- pulsar_host=self.pulsar_host,
- pulsar_api_key=self.pulsar_api_key,
- input_queue=pr_request_queue,
- output_queue=pr_response_queue,
- subscriber = module + "-prompt",
+ self.register_specification(
+ ProducerSpec(
+ name = "triples",
+ schema = Triples
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "entity-contexts",
+ schema = EntityContexts
+ )
)
def to_uri(self, text):
@@ -93,36 +71,47 @@ class Processor(ConsumerProducer):
return uri
- def get_definitions(self, chunk):
-
- return self.prompt.request_definitions(chunk)
-
- async def emit_edges(self, metadata, triples):
+ async def emit_triples(self, pub, metadata, triples):
t = Triples(
metadata=metadata,
triples=triples,
)
- await self.send(t)
+ await pub.send(t)
- async def emit_ecs(self, metadata, entities):
+ async def emit_ecs(self, pub, metadata, entities):
t = EntityContexts(
metadata=metadata,
entities=entities,
)
- self.ec_prod.send(t)
+ await pub.send(t)
- async def handle(self, msg):
+ async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
chunk = v.chunk.decode("utf-8")
+ print(chunk, flush=True)
+
try:
- defs = self.get_definitions(chunk)
+ try:
+
+ defs = await flow("prompt-request").extract_definitions(
+ text = chunk
+ )
+
+ print("Response", defs, flush=True)
+
+ if type(defs) != list:
+ raise RuntimeError("Expecting array in prompt response")
+
+ except Exception as e:
+ print("Prompt exception:", e, flush=True)
+ raise e
triples = []
entities = []
@@ -134,8 +123,8 @@ class Processor(ConsumerProducer):
for defn in defs:
- s = defn.name
- o = defn.definition
+ s = defn["entity"]
+ o = defn["definition"]
if s == "": continue
if o == "": continue
@@ -166,13 +155,13 @@ class Processor(ConsumerProducer):
ec = EntityContext(
entity=s_value,
- context=defn.definition,
+ context=defn["definition"],
)
entities.append(ec)
-
- await self.emit_edges(
+ await self.emit_triples(
+ flow("triples"),
Metadata(
id=v.metadata.id,
metadata=[],
@@ -183,6 +172,7 @@ class Processor(ConsumerProducer):
)
await self.emit_ecs(
+ flow("entity-contexts"),
Metadata(
id=v.metadata.id,
metadata=[],
@@ -200,30 +190,9 @@ class Processor(ConsumerProducer):
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
-
- parser.add_argument(
- '-e', '--entity-context-queue',
- default=default_entity_context_queue,
- help=f'Entity context queue (default: {default_entity_context_queue})'
- )
-
- parser.add_argument(
- '--prompt-request-queue',
- default=prompt_request_queue,
- help=f'Prompt request queue (default: {prompt_request_queue})',
- )
-
- parser.add_argument(
- '--prompt-completion-response-queue',
- default=prompt_response_queue,
- help=f'Prompt response queue (default: {prompt_response_queue})',
- )
+ FlowProcessor.add_args(parser)
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/extract/kg/relationships/extract.py b/trustgraph-flow/trustgraph/extract/kg/relationships/extract.py
index 2f293527..63670a7d 100755
--- a/trustgraph-flow/trustgraph/extract/kg/relationships/extract.py
+++ b/trustgraph-flow/trustgraph/extract/kg/relationships/extract.py
@@ -5,59 +5,54 @@ relationship analysis to get entity relationship edges which are output as
graph edges.
"""
+import json
import urllib.parse
from .... schema import Chunk, Triple, Triples
from .... schema import Metadata, Value
-from .... schema import chunk_ingest_queue, triples_store_queue
-from .... schema import prompt_request_queue
-from .... schema import prompt_response_queue
-from .... log_level import LogLevel
-from .... clients.prompt_client import PromptClient
+from .... schema import PromptRequest, PromptResponse
from .... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES, SUBJECT_OF
-from .... base import ConsumerProducer
+
+from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
+from .... base import PromptClientSpec
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True)
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "kg-extract-relationships"
-default_input_queue = chunk_ingest_queue
-default_output_queue = triples_store_queue
-default_subscriber = module
-
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
- pr_request_queue = params.get(
- "prompt_request_queue", prompt_request_queue
- )
- pr_response_queue = params.get(
- "prompt_response_queue", prompt_response_queue
- )
+ id = params.get("id")
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": Chunk,
- "output_schema": Triples,
- "prompt_request_queue": pr_request_queue,
- "prompt_response_queue": pr_response_queue,
+ "id": id,
}
)
- self.prompt = PromptClient(
- pulsar_host=self.pulsar_host,
- pulsar_api_key=self.pulsar_api_key,
- input_queue=pr_request_queue,
- output_queue=pr_response_queue,
- subscriber = module + "-prompt",
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = Chunk,
+ handler = self.on_message
+ )
+ )
+
+ self.register_specification(
+ PromptClientSpec(
+ request_name = "prompt-request",
+ response_name = "prompt-response",
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "triples",
+ schema = Triples
+ )
)
def to_uri(self, text):
@@ -68,28 +63,39 @@ class Processor(ConsumerProducer):
return uri
- def get_relationships(self, chunk):
-
- return self.prompt.request_relationships(chunk)
-
- async def emit_edges(self, metadata, triples):
+ async def emit_triples(self, pub, metadata, triples):
t = Triples(
metadata=metadata,
triples=triples,
)
- await self.send(t)
+ await pub.send(t)
- async def handle(self, msg):
+ async def on_message(self, msg, consumer, flow):
v = msg.value()
print(f"Indexing {v.metadata.id}...", flush=True)
chunk = v.chunk.decode("utf-8")
+ print(chunk, flush=True)
+
try:
- rels = self.get_relationships(chunk)
+ try:
+
+ rels = await flow("prompt-request").extract_relationships(
+ text = chunk
+ )
+
+ print("Response", rels, flush=True)
+
+ if type(rels) != list:
+ raise RuntimeError("Expecting array in prompt response")
+
+ except Exception as e:
+ print("Prompt exception:", e, flush=True)
+ raise e
triples = []
@@ -100,9 +106,9 @@ class Processor(ConsumerProducer):
for rel in rels:
- s = rel.s
- p = rel.p
- o = rel.o
+ s = rel["subject"]
+ p = rel["predicate"]
+ o = rel["object"]
if s == "": continue
if p == "": continue
@@ -118,7 +124,7 @@ class Processor(ConsumerProducer):
p_uri = self.to_uri(p)
p_value = Value(value=str(p_uri), is_uri=True)
- if rel.o_entity:
+ if rel["object-entity"]:
o_uri = self.to_uri(o)
o_value = Value(value=str(o_uri), is_uri=True)
else:
@@ -144,7 +150,7 @@ class Processor(ConsumerProducer):
o=Value(value=str(p), is_uri=False)
))
- if rel.o_entity:
+ if rel["object-entity"]:
# Label for o
triples.append(Triple(
s=o_value,
@@ -159,7 +165,7 @@ class Processor(ConsumerProducer):
o=Value(value=v.metadata.id, is_uri=True)
))
- if rel.o_entity:
+ if rel["object-entity"]:
# 'Subject of' for o
triples.append(Triple(
s=o_value,
@@ -167,7 +173,8 @@ class Processor(ConsumerProducer):
o=Value(value=v.metadata.id, is_uri=True)
))
- await self.emit_edges(
+ await self.emit_triples(
+ flow("triples"),
Metadata(
id=v.metadata.id,
metadata=[],
@@ -185,24 +192,9 @@ class Processor(ConsumerProducer):
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
-
- parser.add_argument(
- '--prompt-request-queue',
- default=prompt_request_queue,
- help=f'Prompt request queue (default: {prompt_request_queue})',
- )
-
- parser.add_argument(
- '--prompt-response-queue',
- default=prompt_response_queue,
- help=f'Prompt response queue (default: {prompt_response_queue})',
- )
+ FlowProcessor.add_args(parser)
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/extract/kg/topics/extract.py b/trustgraph-flow/trustgraph/extract/kg/topics/extract.py
index 7424abe2..84ab6681 100755
--- a/trustgraph-flow/trustgraph/extract/kg/topics/extract.py
+++ b/trustgraph-flow/trustgraph/extract/kg/topics/extract.py
@@ -18,7 +18,7 @@ from .... base import ConsumerProducer
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
-module = ".".join(__name__.split(".")[1:-1])
+module = "kg-extract-topics"
default_input_queue = chunk_ingest_queue
default_output_queue = triples_store_queue
diff --git a/trustgraph-flow/trustgraph/gateway/config/receiver.py b/trustgraph-flow/trustgraph/gateway/config/receiver.py
new file mode 100755
index 00000000..63800a41
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/config/receiver.py
@@ -0,0 +1,121 @@
+"""
+API gateway. Offers HTTP services which are translated to interaction on the
+Pulsar bus.
+"""
+
+module = "api-gateway"
+
+# FIXME: Subscribes to Pulsar unnecessarily, should only do it when there
+# are active listeners
+
+# FIXME: Connection errors in publishers / subscribers cause those threads
+# to fail and are not failed or retried
+
+import asyncio
+import argparse
+from aiohttp import web
+import logging
+import os
+import base64
+import uuid
+import json
+
+import pulsar
+from prometheus_client import start_http_server
+
+from ... schema import ConfigPush, config_push_queue
+from ... base import Consumer
+
+logger = logging.getLogger("config.receiver")
+logger.setLevel(logging.INFO)
+
+class ConfigReceiver:
+
+ def __init__(self, pulsar_client):
+
+ self.pulsar_client = pulsar_client
+
+ self.flow_handlers = []
+
+ self.flows = {}
+
+ def add_handler(self, h):
+ self.flow_handlers.append(h)
+
+ async def on_config(self, msg, proc, flow):
+
+ try:
+
+ v = msg.value()
+
+ print(f"Config version", v.version)
+
+ if "flows" in v.config:
+
+ flows = v.config["flows"]
+
+ wanted = list(flows.keys())
+ current = list(self.flows.keys())
+
+ for k in wanted:
+ if k not in current:
+ self.flows[k] = json.loads(flows[k])
+ await self.start_flow(k, self.flows[k])
+
+ for k in current:
+ if k not in wanted:
+ await self.stop_flow(k, self.flows[k])
+ del self.flows[k]
+
+ except Exception as e:
+ print(f"Exception: {e}", flush=True)
+
+ async def start_flow(self, id, flow):
+
+ print("Start flow", id)
+
+ for handler in self.flow_handlers:
+
+ try:
+ await handler.start_flow(id, flow)
+ except Exception as e:
+ print(f"Exception: {e}", flush=True)
+
+ async def stop_flow(self, id, flow):
+
+ print("Stop flow", id)
+
+ for handler in self.flow_handlers:
+
+ try:
+ await handler.stop_flow(id, flow)
+ except Exception as e:
+ print(f"Exception: {e}", flush=True)
+
+ async def config_loader(self):
+
+ async with asyncio.TaskGroup() as tg:
+
+ id = str(uuid.uuid4())
+
+ self.config_cons = Consumer(
+ taskgroup = tg,
+ flow = None,
+ client = self.pulsar_client,
+ subscriber = f"gateway-{id}",
+ topic = config_push_queue,
+ schema = ConfigPush,
+ handler = self.on_config,
+ start_of_messages = True,
+ )
+
+ await self.config_cons.start()
+
+ print("Waiting...")
+
+ print("Config consumer done. :/")
+
+ async def start(self):
+
+ asyncio.create_task(self.config_loader())
+
diff --git a/trustgraph-flow/trustgraph/gateway/agent.py b/trustgraph-flow/trustgraph/gateway/dispatch/agent.py
similarity index 70%
rename from trustgraph-flow/trustgraph/gateway/agent.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/agent.py
index 150b970e..d0fd8537 100644
--- a/trustgraph-flow/trustgraph/gateway/agent.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/agent.py
@@ -1,20 +1,22 @@
-from .. schema import AgentRequest, AgentResponse
-from .. schema import agent_request_queue
-from .. schema import agent_response_queue
+from ... schema import AgentRequest, AgentResponse
-from . endpoint import ServiceEndpoint
from . requestor import ServiceRequestor
class AgentRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
+ def __init__(
+ self, pulsar_client, request_queue, response_queue, timeout,
+ consumer, subscriber,
+ ):
super(AgentRequestor, self).__init__(
pulsar_client=pulsar_client,
- request_queue=agent_request_queue,
- response_queue=agent_response_queue,
+ request_queue=request_queue,
+ response_queue=response_queue,
request_schema=AgentRequest,
response_schema=AgentResponse,
+ subscription = subscriber,
+ consumer_name = consumer,
timeout=timeout,
)
@@ -39,4 +41,3 @@ class AgentRequestor(ServiceRequestor):
# The 2nd boolean expression indicates whether we're done responding
return resp, (message.answer is not None)
-
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/config.py b/trustgraph-flow/trustgraph/gateway/dispatch/config.py
new file mode 100644
index 00000000..3aeedb6f
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/config.py
@@ -0,0 +1,78 @@
+
+from ... schema import ConfigRequest, ConfigResponse, ConfigKey, ConfigValue
+from ... schema import config_request_queue
+from ... schema import config_response_queue
+
+from . requestor import ServiceRequestor
+
+class ConfigRequestor(ServiceRequestor):
+ def __init__(self, pulsar_client, consumer, subscriber, timeout=120):
+
+ super(ConfigRequestor, self).__init__(
+ pulsar_client=pulsar_client,
+ consumer_name = consumer,
+ subscription = subscriber,
+ request_queue=config_request_queue,
+ response_queue=config_response_queue,
+ request_schema=ConfigRequest,
+ response_schema=ConfigResponse,
+ timeout=timeout,
+ )
+
+ def to_request(self, body):
+
+ if "keys" in body:
+ keys = [
+ ConfigKey(
+ type = k["type"],
+ key = k["key"],
+ )
+ for k in body["keys"]
+ ]
+ else:
+ keys = None
+
+ if "values" in body:
+ values = [
+ ConfigValue(
+ type = v["type"],
+ key = v["key"],
+ value = v["value"],
+ )
+ for v in body["values"]
+ ]
+ else:
+ values = None
+
+ return ConfigRequest(
+ operation = body.get("operation", None),
+ keys = keys,
+ type = body.get("type", None),
+ values = values
+ )
+
+ def from_response(self, message):
+
+ response = { }
+
+ if message.version is not None:
+ response["version"] = message.version
+
+ if message.values is not None:
+ response["values"] = [
+ {
+ "type": v.type,
+ "key": v.key,
+ "value": v.value,
+ }
+ for v in message.values
+ ]
+
+ if message.directory is not None:
+ response["directory"] = message.directory
+
+ if message.config is not None:
+ response["config"] = message.config
+
+ return response, True
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_export.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_export.py
new file mode 100644
index 00000000..2587132d
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_export.py
@@ -0,0 +1,67 @@
+
+import asyncio
+import queue
+import uuid
+
+from ... schema import DocumentEmbeddings
+from ... base import Subscriber
+
+from . serialize import serialize_document_embeddings
+
+class DocumentEmbeddingsExport:
+
+ def __init__(
+ self, ws, running, pulsar_client, queue, consumer, subscriber
+ ):
+
+ self.ws = ws
+ self.running = running
+ self.pulsar_client = pulsar_client
+ self.queue = queue
+ self.consumer = consumer
+ self.subscriber = subscriber
+
+ async def destroy(self):
+ self.running.stop()
+ await self.ws.close()
+
+ async def receive(self, msg):
+ # Ignore incoming info from websocket
+ pass
+
+ async def run(self):
+
+ subs = Subscriber(
+ client = self.pulsar_client, topic = self.queue,
+ consumer_name = self.consumer, subscription = self.subscriber,
+ schema = DocumentEmbeddings
+ )
+
+ await subs.start()
+
+ id = str(uuid.uuid4())
+ q = await subs.subscribe_all(id)
+
+ while self.running.get():
+ try:
+
+ resp = await asyncio.wait_for(q.get(), timeout=0.5)
+ await self.ws.send_json(serialize_document_embeddings(resp))
+
+ except TimeoutError:
+ continue
+
+ except queue.Empty:
+ continue
+
+ except Exception as e:
+ print(f"Exception: {str(e)}", flush=True)
+ break
+
+ await subs.unsubscribe_all(id)
+
+ await subs.stop()
+
+ await self.ws.close()
+ self.running.stop()
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py
new file mode 100644
index 00000000..1f459081
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_embeddings_import.py
@@ -0,0 +1,64 @@
+
+import asyncio
+import uuid
+from aiohttp import WSMsgType
+
+from ... schema import Metadata
+from ... schema import DocumentEmbeddings, ChunkEmbeddings
+from ... base import Publisher
+
+from . serialize import to_subgraph
+
+class DocumentEmbeddingsImport:
+
+ def __init__(
+ self, ws, running, pulsar_client, queue
+ ):
+
+ self.ws = ws
+ self.running = running
+
+ self.publisher = Publisher(
+ pulsar_client, topic = queue, schema = DocumentEmbeddings
+ )
+
+ async def destroy(self):
+ self.running.stop()
+
+ if self.ws:
+ await self.ws.close()
+
+ await self.publisher.stop()
+
+ async def receive(self, msg):
+
+ data = msg.json()
+
+ elt = DocumentEmbeddings(
+ metadata=Metadata(
+ id=data["metadata"]["id"],
+ metadata=to_subgraph(data["metadata"]["metadata"]),
+ user=data["metadata"]["user"],
+ collection=data["metadata"]["collection"],
+ ),
+ chunks=[
+ ChunkEmbeddings(
+ chunk=de["chunk"].encode("utf-8"),
+ vectors=de["vectors"],
+ )
+ for de in data["chunks"]
+ ],
+ )
+
+ await self.publisher.send(None, elt)
+
+ async def run(self):
+
+ while self.running.get():
+ await asyncio.sleep(0.5)
+
+ if self.ws:
+ await self.ws.close()
+
+ self.ws = None
+
diff --git a/trustgraph-flow/trustgraph/gateway/document_load.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py
similarity index 68%
rename from trustgraph-flow/trustgraph/gateway/document_load.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/document_load.py
index 78cd7930..f92fc34f 100644
--- a/trustgraph-flow/trustgraph/gateway/document_load.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_load.py
@@ -1,19 +1,18 @@
import base64
-from .. schema import Document, Metadata
-from .. schema import document_ingest_queue
+from ... schema import Document, Metadata
from . sender import ServiceSender
from . serialize import to_subgraph
-class DocumentLoadSender(ServiceSender):
- def __init__(self, pulsar_client):
+class DocumentLoad(ServiceSender):
+ def __init__(self, pulsar_client, queue):
- super(DocumentLoadSender, self).__init__(
- pulsar_client=pulsar_client,
- request_queue=document_ingest_queue,
- request_schema=Document,
+ super(DocumentLoad, self).__init__(
+ pulsar_client = pulsar_client,
+ queue = queue,
+ schema = Document,
)
def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/gateway/document_rag.py b/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py
similarity index 65%
rename from trustgraph-flow/trustgraph/gateway/document_rag.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py
index 94d8f788..29194f97 100644
--- a/trustgraph-flow/trustgraph/gateway/document_rag.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/document_rag.py
@@ -1,20 +1,22 @@
-from .. schema import DocumentRagQuery, DocumentRagResponse
-from .. schema import document_rag_request_queue
-from .. schema import document_rag_response_queue
+from ... schema import DocumentRagQuery, DocumentRagResponse
-from . endpoint import ServiceEndpoint
from . requestor import ServiceRequestor
class DocumentRagRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
+ def __init__(
+ self, pulsar_client, request_queue, response_queue, timeout,
+ consumer, subscriber,
+ ):
super(DocumentRagRequestor, self).__init__(
pulsar_client=pulsar_client,
- request_queue=document_rag_request_queue,
- response_queue=document_rag_response_queue,
+ request_queue=request_queue,
+ response_queue=response_queue,
request_schema=DocumentRagQuery,
response_schema=DocumentRagResponse,
+ subscription = subscriber,
+ consumer_name = consumer,
timeout=timeout,
)
diff --git a/trustgraph-flow/trustgraph/gateway/embeddings.py b/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py
similarity index 59%
rename from trustgraph-flow/trustgraph/gateway/embeddings.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py
index 42ed91a1..4549942e 100644
--- a/trustgraph-flow/trustgraph/gateway/embeddings.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/embeddings.py
@@ -1,20 +1,22 @@
-from .. schema import EmbeddingsRequest, EmbeddingsResponse
-from .. schema import embeddings_request_queue
-from .. schema import embeddings_response_queue
+from ... schema import EmbeddingsRequest, EmbeddingsResponse
-from . endpoint import ServiceEndpoint
from . requestor import ServiceRequestor
class EmbeddingsRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
+ def __init__(
+ self, pulsar_client, request_queue, response_queue, timeout,
+ consumer, subscriber,
+ ):
super(EmbeddingsRequestor, self).__init__(
pulsar_client=pulsar_client,
- request_queue=embeddings_request_queue,
- response_queue=embeddings_response_queue,
+ request_queue=request_queue,
+ response_queue=response_queue,
request_schema=EmbeddingsRequest,
response_schema=EmbeddingsResponse,
+ subscription = subscriber,
+ consumer_name = consumer,
timeout=timeout,
)
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/flow.py b/trustgraph-flow/trustgraph/gateway/dispatch/flow.py
new file mode 100644
index 00000000..0b38e9be
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/flow.py
@@ -0,0 +1,52 @@
+
+from ... schema import FlowRequest, FlowResponse
+from ... schema import flow_request_queue
+from ... schema import flow_response_queue
+
+from . requestor import ServiceRequestor
+
+class FlowRequestor(ServiceRequestor):
+ def __init__(self, pulsar_client, consumer, subscriber, timeout=120):
+
+ super(FlowRequestor, self).__init__(
+ pulsar_client=pulsar_client,
+ consumer_name = consumer,
+ subscription = subscriber,
+ request_queue=flow_request_queue,
+ response_queue=flow_response_queue,
+ request_schema=FlowRequest,
+ response_schema=FlowResponse,
+ timeout=timeout,
+ )
+
+ def to_request(self, body):
+
+ return FlowRequest(
+ operation = body.get("operation", None),
+ class_name = body.get("class-name", None),
+ class_definition = body.get("class-definition", None),
+ description = body.get("description", None),
+ flow_id = body.get("flow-id", None),
+ )
+
+ def from_response(self, message):
+
+ response = { }
+
+ if message.class_names is not None:
+ response["class-names"] = message.class_names
+
+ if message.flow_ids is not None:
+ response["flow-ids"] = message.flow_ids
+
+ if message.class_definition is not None:
+ response["class-definition"] = message.class_definition
+
+ if message.flow is not None:
+ response["flow"] = message.flow
+
+ if message.description is not None:
+ response["description"] = message.description
+
+ return response, True
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_export.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_export.py
new file mode 100644
index 00000000..07f72550
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_export.py
@@ -0,0 +1,67 @@
+
+import asyncio
+import queue
+import uuid
+
+from ... schema import GraphEmbeddings
+from ... base import Subscriber
+
+from . serialize import serialize_graph_embeddings
+
+class GraphEmbeddingsExport:
+
+ def __init__(
+ self, ws, running, pulsar_client, queue, consumer, subscriber
+ ):
+
+ self.ws = ws
+ self.running = running
+ self.pulsar_client = pulsar_client
+ self.queue = queue
+ self.consumer = consumer
+ self.subscriber = subscriber
+
+ async def destroy(self):
+ self.running.stop()
+ await self.ws.close()
+
+ async def receive(self, msg):
+ # Ignore incoming info from websocket
+ pass
+
+ async def run(self):
+
+ subs = Subscriber(
+ client = self.pulsar_client, topic = self.queue,
+ consumer_name = self.consumer, subscription = self.subscriber,
+ schema = GraphEmbeddings
+ )
+
+ await subs.start()
+
+ id = str(uuid.uuid4())
+ q = await subs.subscribe_all(id)
+
+ while self.running.get():
+ try:
+
+ resp = await asyncio.wait_for(q.get(), timeout=0.5)
+ await self.ws.send_json(serialize_graph_embeddings(resp))
+
+ except TimeoutError:
+ continue
+
+ except queue.Empty:
+ continue
+
+ except Exception as e:
+ print(f"Exception: {str(e)}", flush=True)
+ break
+
+ await subs.unsubscribe_all(id)
+
+ await subs.stop()
+
+ await self.ws.close()
+ self.running.stop()
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_import.py
new file mode 100644
index 00000000..70e78c87
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_import.py
@@ -0,0 +1,64 @@
+
+import asyncio
+import uuid
+from aiohttp import WSMsgType
+
+from ... schema import Metadata
+from ... schema import GraphEmbeddings, EntityEmbeddings
+from ... base import Publisher
+
+from . serialize import to_subgraph, to_value
+
+class GraphEmbeddingsImport:
+
+ def __init__(
+ self, ws, running, pulsar_client, queue
+ ):
+
+ self.ws = ws
+ self.running = running
+
+ self.publisher = Publisher(
+ pulsar_client, topic = queue, schema = GraphEmbeddings
+ )
+
+ async def destroy(self):
+ self.running.stop()
+
+ if self.ws:
+ await self.ws.close()
+
+ await self.publisher.stop()
+
+ async def receive(self, msg):
+
+ data = msg.json()
+
+ elt = GraphEmbeddings(
+ metadata=Metadata(
+ id=data["metadata"]["id"],
+ metadata=to_subgraph(data["metadata"]["metadata"]),
+ user=data["metadata"]["user"],
+ collection=data["metadata"]["collection"],
+ ),
+ entities=[
+ EntityEmbeddings(
+ entity=to_value(ent["entity"]),
+ vectors=ent["vectors"],
+ )
+ for ent in data["entities"]
+ ]
+ )
+
+ await self.publisher.send(None, elt)
+
+ async def run(self):
+
+ while self.running.get():
+ await asyncio.sleep(0.5)
+
+ if self.ws:
+ await self.ws.close()
+
+ self.ws = None
+
diff --git a/trustgraph-flow/trustgraph/gateway/graph_embeddings_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py
similarity index 69%
rename from trustgraph-flow/trustgraph/gateway/graph_embeddings_query.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py
index 8df38e97..27ceb702 100644
--- a/trustgraph-flow/trustgraph/gateway/graph_embeddings_query.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_embeddings_query.py
@@ -1,21 +1,23 @@
-from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
-from .. schema import graph_embeddings_request_queue
-from .. schema import graph_embeddings_response_queue
+from ... schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
-from . endpoint import ServiceEndpoint
from . requestor import ServiceRequestor
from . serialize import serialize_value
class GraphEmbeddingsQueryRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
+ def __init__(
+ self, pulsar_client, request_queue, response_queue, timeout,
+ consumer, subscriber,
+ ):
super(GraphEmbeddingsQueryRequestor, self).__init__(
pulsar_client=pulsar_client,
- request_queue=graph_embeddings_request_queue,
- response_queue=graph_embeddings_response_queue,
+ request_queue=request_queue,
+ response_queue=response_queue,
request_schema=GraphEmbeddingsRequest,
response_schema=GraphEmbeddingsResponse,
+ subscription = subscriber,
+ consumer_name = consumer,
timeout=timeout,
)
diff --git a/trustgraph-flow/trustgraph/gateway/graph_rag.py b/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py
similarity index 71%
rename from trustgraph-flow/trustgraph/gateway/graph_rag.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py
index b2b69758..a31795b9 100644
--- a/trustgraph-flow/trustgraph/gateway/graph_rag.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/graph_rag.py
@@ -1,20 +1,22 @@
-from .. schema import GraphRagQuery, GraphRagResponse
-from .. schema import graph_rag_request_queue
-from .. schema import graph_rag_response_queue
+from ... schema import GraphRagQuery, GraphRagResponse
-from . endpoint import ServiceEndpoint
from . requestor import ServiceRequestor
class GraphRagRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
+ def __init__(
+ self, pulsar_client, request_queue, response_queue, timeout,
+ consumer, subscriber,
+ ):
super(GraphRagRequestor, self).__init__(
pulsar_client=pulsar_client,
- request_queue=graph_rag_request_queue,
- response_queue=graph_rag_response_queue,
+ request_queue=request_queue,
+ response_queue=response_queue,
request_schema=GraphRagQuery,
response_schema=GraphRagResponse,
+ subscription = subscriber,
+ consumer_name = consumer,
timeout=timeout,
)
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py b/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py
new file mode 100644
index 00000000..a35ee4f0
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/knowledge.py
@@ -0,0 +1,97 @@
+
+import base64
+
+from ... schema import KnowledgeRequest, KnowledgeResponse, Triples
+from ... schema import GraphEmbeddings, Metadata, EntityEmbeddings
+from ... schema import knowledge_request_queue
+from ... schema import knowledge_response_queue
+
+from . requestor import ServiceRequestor
+from . serialize import serialize_graph_embeddings
+from . serialize import serialize_triples, to_subgraph, to_value
+from . serialize import to_document_metadata, to_processing_metadata
+
+class KnowledgeRequestor(ServiceRequestor):
+ def __init__(self, pulsar_client, consumer, subscriber, timeout=120):
+
+ super(KnowledgeRequestor, self).__init__(
+ pulsar_client=pulsar_client,
+ consumer_name = consumer,
+ subscription = subscriber,
+ request_queue=knowledge_request_queue,
+ response_queue=knowledge_response_queue,
+ request_schema=KnowledgeRequest,
+ response_schema=KnowledgeResponse,
+ timeout=timeout,
+ )
+
+ def to_request(self, body):
+
+ if "triples" in body:
+ triples = Triples(
+ metadata=Metadata(
+ id = body["triples"]["metadata"]["id"],
+ metadata = to_subgraph(body["triples"]["metadata"]["metadata"]),
+ user = body["triples"]["metadata"]["user"],
+ ),
+ triples = to_subgraph(body["triples"]["triples"]),
+ )
+ else:
+ triples = None
+
+ if "graph-embeddings" in body:
+ ge = GraphEmbeddings(
+ metadata = Metadata(
+ id = body["graph-embeddings"]["metadata"]["id"],
+ metadata = to_subgraph(body["graph-embeddings"]["metadata"]["metadata"]),
+ user = body["graph-embeddings"]["metadata"]["user"],
+ ),
+ entities=[
+ EntityEmbeddings(
+ entity = to_value(ent["entity"]),
+ vectors = ent["vectors"],
+ )
+ for ent in body["graph-embeddings"]["entities"]
+ ]
+ )
+ else:
+ ge = None
+
+ return KnowledgeRequest(
+ operation = body.get("operation", None),
+ user = body.get("user", None),
+ id = body.get("id", None),
+ flow = body.get("flow", None),
+ collection = body.get("collection", None),
+ triples = triples,
+ graph_embeddings = ge,
+ )
+
+ def from_response(self, message):
+
+ # Response to list,
+ if message.ids is not None:
+ return {
+ "ids": message.ids
+ }, True
+
+ if message.triples:
+ return {
+ "triples": serialize_triples(message.triples)
+ }, False
+
+ if message.graph_embeddings:
+ return {
+ "graph-embeddings": serialize_graph_embeddings(
+ message.graph_embeddings
+ )
+ }, False
+
+ if message.eos is True:
+ return {
+ "eos": True
+ }, True
+
+ # Empty case, return from successful delete.
+ return {}, True
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py b/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py
new file mode 100644
index 00000000..d33138ac
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/librarian.py
@@ -0,0 +1,93 @@
+
+import base64
+
+from ... schema import LibrarianRequest, LibrarianResponse
+from ... schema import librarian_request_queue
+from ... schema import librarian_response_queue
+
+from . requestor import ServiceRequestor
+from . serialize import serialize_document_metadata
+from . serialize import serialize_processing_metadata
+from . serialize import to_document_metadata, to_processing_metadata
+from . serialize import to_criteria
+
+class LibrarianRequestor(ServiceRequestor):
+ def __init__(self, pulsar_client, consumer, subscriber, timeout=120):
+
+ super(LibrarianRequestor, self).__init__(
+ pulsar_client=pulsar_client,
+ consumer_name = consumer,
+ subscription = subscriber,
+ request_queue=librarian_request_queue,
+ response_queue=librarian_response_queue,
+ request_schema=LibrarianRequest,
+ response_schema=LibrarianResponse,
+ timeout=timeout,
+ )
+
+ def to_request(self, body):
+
+ # Content gets base64 decoded & encoded again. It at least makes
+ # sure payload is valid base64.
+
+ if "document-metadata" in body:
+ dm = to_document_metadata(body["document-metadata"])
+ else:
+ dm = None
+
+ if "processing-metadata" in body:
+ pm = to_processing_metadata(body["processing-metadata"])
+ else:
+ pm = None
+
+ if "criteria" in body:
+ criteria = to_criteria(body["criteria"])
+ else:
+ criteria = None
+
+ if "content" in body:
+ content = base64.b64decode(body["content"].encode("utf-8"))
+ content = base64.b64encode(content).decode("utf-8")
+ else:
+ content = None
+
+ return LibrarianRequest(
+ operation = body.get("operation", None),
+ document_id = body.get("document-id", None),
+ processing_id = body.get("processing-id", None),
+ document_metadata = dm,
+ processing_metadata = pm,
+ content = content,
+ user = body.get("user", None),
+ collection = body.get("collection", None),
+ criteria = criteria,
+ )
+
+ def from_response(self, message):
+
+ print(message)
+
+ response = {}
+
+ if message.document_metadata:
+ response["document-metadata"] = serialize_document_metadata(
+ message.document_metadata
+ )
+
+ if message.content:
+ response["content"] = message.content.decode("utf-8")
+
+ if message.document_metadatas != None:
+ response["document-metadatas"] = [
+ serialize_document_metadata(v)
+ for v in message.document_metadatas
+ ]
+
+ if message.processing_metadatas != None:
+ response["processing-metadatas"] = [
+ serialize_processing_metadata(v)
+ for v in message.processing_metadatas
+ ]
+
+ return response, True
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/manager.py b/trustgraph-flow/trustgraph/gateway/dispatch/manager.py
new file mode 100644
index 00000000..7896d588
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/manager.py
@@ -0,0 +1,250 @@
+
+import asyncio
+import uuid
+
+from . config import ConfigRequestor
+from . flow import FlowRequestor
+from . librarian import LibrarianRequestor
+from . knowledge import KnowledgeRequestor
+
+from . embeddings import EmbeddingsRequestor
+from . agent import AgentRequestor
+from . text_completion import TextCompletionRequestor
+from . prompt import PromptRequestor
+from . graph_rag import GraphRagRequestor
+from . document_rag import DocumentRagRequestor
+from . triples_query import TriplesQueryRequestor
+from . embeddings import EmbeddingsRequestor
+from . graph_embeddings_query import GraphEmbeddingsQueryRequestor
+from . prompt import PromptRequestor
+from . text_load import TextLoad
+from . document_load import DocumentLoad
+
+from . triples_export import TriplesExport
+from . graph_embeddings_export import GraphEmbeddingsExport
+from . document_embeddings_export import DocumentEmbeddingsExport
+
+from . triples_import import TriplesImport
+from . graph_embeddings_import import GraphEmbeddingsImport
+from . document_embeddings_import import DocumentEmbeddingsImport
+
+from . mux import Mux
+
+request_response_dispatchers = {
+ "agent": AgentRequestor,
+ "text-completion": TextCompletionRequestor,
+ "prompt": PromptRequestor,
+ "graph-rag": GraphRagRequestor,
+ "document-rag": DocumentRagRequestor,
+ "embeddings": EmbeddingsRequestor,
+ "graph-embeddings": GraphEmbeddingsQueryRequestor,
+ "triples": TriplesQueryRequestor,
+}
+
+global_dispatchers = {
+ "config": ConfigRequestor,
+ "flow": FlowRequestor,
+ "librarian": LibrarianRequestor,
+ "knowledge": KnowledgeRequestor,
+}
+
+sender_dispatchers = {
+ "text-load": TextLoad,
+ "document-load": DocumentLoad,
+}
+
+export_dispatchers = {
+ "triples": TriplesExport,
+ "graph-embeddings": GraphEmbeddingsExport,
+ "document-embeddings": DocumentEmbeddingsExport,
+}
+
+import_dispatchers = {
+ "triples": TriplesImport,
+ "graph-embeddings": GraphEmbeddingsImport,
+ "document-embeddings": DocumentEmbeddingsImport,
+}
+
+class DispatcherWrapper:
+ def __init__(self, handler):
+ self.handler = handler
+ async def process(self, *args):
+ return await self.handler(*args)
+
+class DispatcherManager:
+
+ def __init__(self, pulsar_client, config_receiver):
+ self.pulsar_client = pulsar_client
+ self.config_receiver = config_receiver
+ self.config_receiver.add_handler(self)
+
+ self.flows = {}
+ self.dispatchers = {}
+
+ async def start_flow(self, id, flow):
+ print("Start flow", id)
+ self.flows[id] = flow
+ return
+
+ async def stop_flow(self, id, flow):
+ print("Stop flow", id)
+ del self.flows[id]
+ return
+
+ def dispatch_global_service(self):
+ return DispatcherWrapper(self.process_global_service)
+
+ async def process_global_service(self, data, responder, params):
+
+ kind = params.get("kind")
+ return await self.invoke_global_service(data, responder, kind)
+
+ async def invoke_global_service(self, data, responder, kind):
+
+ key = (None, kind)
+
+ if key in self.dispatchers:
+ return await self.dispatchers[key].process(data, responder)
+
+ dispatcher = global_dispatchers[kind](
+ pulsar_client = self.pulsar_client,
+ timeout = 120,
+ consumer = f"api-gateway-{kind}-request",
+ subscriber = f"api-gateway-{kind}-request",
+ )
+
+ await dispatcher.start()
+
+ self.dispatchers[key] = dispatcher
+
+ return await dispatcher.process(data, responder)
+
+ def dispatch_flow_import(self):
+ return self.process_flow_import
+
+ def dispatch_flow_export(self):
+ return self.process_flow_export
+
+ def dispatch_socket(self):
+ return self.process_socket
+
+ def dispatch_flow_service(self):
+ return DispatcherWrapper(self.process_flow_service)
+
+ async def process_flow_import(self, ws, running, params):
+
+ flow = params.get("flow")
+ kind = params.get("kind")
+
+ if flow not in self.flows:
+ raise RuntimeError("Invalid flow")
+
+ if kind not in import_dispatchers:
+ raise RuntimeError("Invalid kind")
+
+ key = (flow, kind)
+
+ intf_defs = self.flows[flow]["interfaces"]
+
+ if kind not in intf_defs:
+ raise RuntimeError("This kind not supported by flow")
+
+ # FIXME: The -store bit, does it make sense?
+ qconfig = intf_defs[kind + "-store"]
+
+ id = str(uuid.uuid4())
+ dispatcher = import_dispatchers[kind](
+ pulsar_client = self.pulsar_client,
+ ws = ws,
+ running = running,
+ queue = qconfig,
+ )
+
+ return dispatcher
+
+ async def process_flow_export(self, ws, running, params):
+
+ flow = params.get("flow")
+ kind = params.get("kind")
+
+ if flow not in self.flows:
+ raise RuntimeError("Invalid flow")
+
+ if kind not in export_dispatchers:
+ raise RuntimeError("Invalid kind")
+
+ key = (flow, kind)
+
+ intf_defs = self.flows[flow]["interfaces"]
+
+ if kind not in intf_defs:
+ raise RuntimeError("This kind not supported by flow")
+
+ # FIXME: The -store bit, does it make sense?
+ qconfig = intf_defs[kind + "-store"]
+
+ id = str(uuid.uuid4())
+ dispatcher = export_dispatchers[kind](
+ pulsar_client = self.pulsar_client,
+ ws = ws,
+ running = running,
+ queue = qconfig,
+ consumer = f"api-gateway-{id}",
+ subscriber = f"api-gateway-{id}",
+ )
+
+ return dispatcher
+
+ async def process_socket(self, ws, running, params):
+
+ dispatcher = Mux(self, ws, running)
+
+ return dispatcher
+
+ async def process_flow_service(self, data, responder, params):
+
+ flow = params.get("flow")
+ kind = params.get("kind")
+
+ return await self.invoke_flow_service(data, responder, flow, kind)
+
+ async def invoke_flow_service(self, data, responder, flow, kind):
+
+ if flow not in self.flows:
+ raise RuntimeError("Invalid flow")
+
+ key = (flow, kind)
+
+ if key in self.dispatchers:
+ return await self.dispatchers[key].process(data, responder)
+
+ intf_defs = self.flows[flow]["interfaces"]
+
+ if kind not in intf_defs:
+ raise RuntimeError("This kind not supported by flow")
+
+ qconfig = intf_defs[kind]
+
+ if kind in request_response_dispatchers:
+ dispatcher = request_response_dispatchers[kind](
+ pulsar_client = self.pulsar_client,
+ request_queue = qconfig["request"],
+ response_queue = qconfig["response"],
+ timeout = 120,
+ consumer = f"api-gateway-{flow}-{kind}-request",
+ subscriber = f"api-gateway-{flow}-{kind}-request",
+ )
+ elif kind in sender_dispatchers:
+ dispatcher = sender_dispatchers[kind](
+ pulsar_client = self.pulsar_client,
+ queue = qconfig,
+ )
+ else:
+ raise RuntimeError("Invalid kind")
+
+ await dispatcher.start()
+
+ self.dispatchers[key] = dispatcher
+
+ return await dispatcher.process(data, responder)
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/mux.py b/trustgraph-flow/trustgraph/gateway/dispatch/mux.py
new file mode 100644
index 00000000..e2c5a921
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/mux.py
@@ -0,0 +1,167 @@
+
+import asyncio
+import queue
+import uuid
+
+MAX_OUTSTANDING_REQUESTS = 15
+WORKER_CLOSE_WAIT = 0.01
+START_REQUEST_WAIT = 0.1
+
+# This buffers requests until task start, so short-lived
+MAX_QUEUE_SIZE = 10
+
+class Mux:
+
+ def __init__(self, dispatcher_manager, ws, running):
+
+ self.dispatcher_manager = dispatcher_manager
+ self.ws = ws
+ self.running = running
+
+ self.q = asyncio.Queue(maxsize=MAX_QUEUE_SIZE)
+
+ async def destroy(self):
+
+ self.running.stop()
+
+ if self.ws:
+ await self.ws.close()
+
+ async def receive(self, msg):
+
+ try:
+
+ data = msg.json()
+
+ if "request" not in data:
+ raise RuntimeError("Bad message")
+
+ if "id" not in data:
+ raise RuntimeError("Bad message")
+
+ await self.q.put((
+ data["id"], data.get("flow"),
+ data["service"],
+ data["request"]
+ ))
+
+ except Exception as e:
+ print("receive exception:", str(e), flush=True)
+ await self.ws.send_json({"error": str(e)})
+
+ async def maybe_tidy_workers(self, workers):
+
+ while True:
+
+ try:
+
+ await asyncio.wait_for(
+ asyncio.shield(workers[0]),
+ WORKER_CLOSE_WAIT
+ )
+
+ # worker[0] now stopped
+ # FIXME: Delete reference???
+
+ workers.pop(0)
+
+ if len(workers) == 0:
+ break
+
+ # Loop iterates to try the next worker
+
+ except TimeoutError:
+ # worker[0] still running, move on
+ break
+
+ async def start_request_task(self, ws, id, flow, svc, request, workers):
+
+ # Wait for outstanding requests to go below MAX_OUTSTANDING_REQUESTS
+ while len(workers) > MAX_OUTSTANDING_REQUESTS:
+
+ # Fixes deadlock
+ # FIXME: Put it in its own loop
+ await asyncio.sleep(START_REQUEST_WAIT)
+
+ await self.maybe_tidy_workers(workers)
+
+ async def responder(resp, fin):
+ await self.ws.send_json({
+ "id": id,
+ "response": resp,
+ "complete": fin,
+ })
+
+ worker = asyncio.create_task(
+ self.request_task(request, responder, flow, svc)
+ )
+
+ workers.append(worker)
+
+ async def request_task(self, request, responder, flow, svc):
+
+ try:
+
+ if flow:
+
+ await self.dispatcher_manager.invoke_flow_service(
+ request, responder, flow, svc
+ )
+
+ else:
+
+ await self.dispatcher_manager.invoke_global_service(
+ request, responder, svc
+ )
+
+ except Exception as e:
+ await self.ws.send_json({"error": str(e)})
+
+ async def run(self):
+
+ # Worker threads, servicing
+ workers = []
+
+ while self.running.get():
+
+ try:
+
+ if len(workers) > 0:
+ await self.maybe_tidy_workers(workers)
+
+ # Get next request on queue
+ item = await asyncio.wait_for(self.q.get(), 1)
+ id, flow, svc, request = item
+
+ except TimeoutError:
+ continue
+
+ except Exception as e:
+ # This is an internal working error, may not be recoverable
+ print("run prepare exception:", e)
+ await self.ws.send_json({"id": id, "error": str(e)})
+ self.running.stop()
+
+ if self.ws:
+ self.ws.close()
+ self.ws = None
+
+ break
+
+ try:
+ print(id, svc, request)
+
+ await self.start_request_task(
+ self.ws, id, flow, svc, request, workers
+ )
+
+ except Exception as e:
+ print("Exception2:", e)
+ await self.ws.send_json({"error": str(e)})
+
+ self.running.stop()
+
+ if self.ws:
+ self.ws.close()
+ self.ws = None
+
diff --git a/trustgraph-flow/trustgraph/gateway/prompt.py b/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py
similarity index 69%
rename from trustgraph-flow/trustgraph/gateway/prompt.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/prompt.py
index eb50ac73..496d01e5 100644
--- a/trustgraph-flow/trustgraph/gateway/prompt.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/prompt.py
@@ -1,22 +1,24 @@
import json
-from .. schema import PromptRequest, PromptResponse
-from .. schema import prompt_request_queue
-from .. schema import prompt_response_queue
+from ... schema import PromptRequest, PromptResponse
-from . endpoint import ServiceEndpoint
from . requestor import ServiceRequestor
class PromptRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
+ def __init__(
+ self, pulsar_client, request_queue, response_queue, timeout,
+ consumer, subscriber,
+ ):
super(PromptRequestor, self).__init__(
pulsar_client=pulsar_client,
- request_queue=prompt_request_queue,
- response_queue=prompt_response_queue,
+ request_queue=request_queue,
+ response_queue=response_queue,
request_schema=PromptRequest,
response_schema=PromptResponse,
+ subscription = subscriber,
+ consumer_name = consumer,
timeout=timeout,
)
diff --git a/trustgraph-flow/trustgraph/gateway/requestor.py b/trustgraph-flow/trustgraph/gateway/dispatch/requestor.py
similarity index 71%
rename from trustgraph-flow/trustgraph/gateway/requestor.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/requestor.py
index dc74667d..b8a84644 100644
--- a/trustgraph-flow/trustgraph/gateway/requestor.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/requestor.py
@@ -1,11 +1,10 @@
import asyncio
-from pulsar.schema import JsonSchema
import uuid
import logging
-from .. base import Publisher
-from .. base import Subscriber
+from ... base import Publisher
+from ... base import Subscriber
logger = logging.getLogger("requestor")
logger.setLevel(logging.INFO)
@@ -23,21 +22,28 @@ class ServiceRequestor:
self.pub = Publisher(
pulsar_client, request_queue,
- schema=JsonSchema(request_schema),
+ schema=request_schema,
)
self.sub = Subscriber(
pulsar_client, response_queue,
subscription, consumer_name,
- JsonSchema(response_schema)
+ response_schema
)
self.timeout = timeout
- async def start(self):
+ self.running = True
- self.pub.start()
- self.sub.start()
+ async def start(self):
+ self.running = True
+ await self.sub.start()
+ await self.pub.start()
+
+ async def stop(self):
+ await self.pub.stop()
+ await self.sub.stop()
+ self.running = False
def to_request(self, request):
raise RuntimeError("Not defined")
@@ -51,20 +57,18 @@ class ServiceRequestor:
try:
- q = self.sub.subscribe(id)
+ q = await self.sub.subscribe(id)
- await asyncio.to_thread(
- self.pub.send, id, self.to_request(request)
- )
+ await self.pub.send(id, self.to_request(request))
- while True:
+ while self.running:
try:
- resp = await asyncio.to_thread(
- q.get,
- timeout=self.timeout
+ resp = await asyncio.wait_for(
+ q.get(), timeout=self.timeout
)
except Exception as e:
+ print("Exception", e)
raise RuntimeError("Timeout")
if resp.error:
@@ -78,7 +82,7 @@ class ServiceRequestor:
resp, fin = self.from_response(resp)
- print(resp, fin)
+ print(resp, fin, flush=True)
if responder:
await responder(resp, fin)
@@ -99,5 +103,5 @@ class ServiceRequestor:
return err
finally:
- self.sub.unsubscribe(id)
+ await self.sub.unsubscribe(id)
diff --git a/trustgraph-flow/trustgraph/gateway/sender.py b/trustgraph-flow/trustgraph/gateway/dispatch/sender.py
similarity index 69%
rename from trustgraph-flow/trustgraph/gateway/sender.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/sender.py
index 32c586b1..2435cdc1 100644
--- a/trustgraph-flow/trustgraph/gateway/sender.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/sender.py
@@ -2,11 +2,10 @@
# Like ServiceRequestor, but just fire-and-forget instead of request/response
import asyncio
-from pulsar.schema import JsonSchema
import uuid
import logging
-from .. base import Publisher
+from ... base import Publisher
logger = logging.getLogger("sender")
logger.setLevel(logging.INFO)
@@ -16,17 +15,19 @@ class ServiceSender:
def __init__(
self,
pulsar_client,
- request_queue, request_schema,
+ queue, schema,
):
self.pub = Publisher(
- pulsar_client, request_queue,
- schema=JsonSchema(request_schema),
+ pulsar_client, queue,
+ schema=schema,
)
async def start(self):
+ await self.pub.start()
- self.pub.start()
+ async def stop(self):
+ await self.pub.stop()
def to_request(self, request):
raise RuntimeError("Not defined")
@@ -35,13 +36,13 @@ class ServiceSender:
try:
- await asyncio.to_thread(
- self.pub.send, None, self.to_request(request)
- )
+ await self.pub.send(None, self.to_request(request))
if responder:
await responder({}, True)
+ return {}
+
except Exception as e:
logging.error(f"Exception: {e}")
diff --git a/trustgraph-flow/trustgraph/gateway/serialize.py b/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py
similarity index 78%
rename from trustgraph-flow/trustgraph/gateway/serialize.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/serialize.py
index 5cc90a78..45ae55d7 100644
--- a/trustgraph-flow/trustgraph/gateway/serialize.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py
@@ -1,7 +1,7 @@
import base64
-from .. schema import Value, Triple, DocumentPackage, DocumentInfo
+from ... schema import Value, Triple, DocumentMetadata, ProcessingMetadata
def to_value(x):
return Value(value=x["v"], is_uri=x["e"])
@@ -80,88 +80,86 @@ def serialize_document_embeddings(message):
],
}
-def serialize_document_package(message):
+def serialize_document_metadata(message):
ret = {}
if message.id:
ret["id"] = message.id
- if message.metadata:
- ret["metadata"] = serialize_subgraph(message.metdata)
-
- if message.document:
- blob = base64.b64encode(
- message.document.encode("utf-8")
- ).decode("utf-8")
- ret["document"] = blob
+ if message.time:
+ ret["time"] = message.time
if message.kind:
ret["kind"] = message.kind
- if message.user:
- ret["user"] = message.user
-
- if message.collection:
- ret["collection"] = message.collection
-
- return ret
-
-def serialize_document_info(message):
-
- ret = {}
-
- if message.id:
- ret["id"] = message.id
-
- if message.kind:
- ret["kind"] = message.kind
-
- if message.user:
- ret["user"] = message.user
-
- if message.collection:
- ret["collection"] = message.collection
-
if message.title:
ret["title"] = message.title
if message.comments:
ret["comments"] = message.comments
- if message.time:
- ret["time"] = message.time
-
if message.metadata:
ret["metadata"] = serialize_subgraph(message.metadata)
+ if message.user:
+ ret["user"] = message.user
+
+ if message.tags is not None:
+ ret["tags"] = message.tags
+
return ret
-def to_document_package(x):
+def serialize_processing_metadata(message):
- return DocumentPackage(
+ ret = {}
+
+ if message.id:
+ ret["id"] = message.id
+
+ if message.id:
+ ret["document-id"] = message.document_id
+
+ if message.time:
+ ret["time"] = message.time
+
+ if message.flow:
+ ret["flow"] = message.flow
+
+ if message.user:
+ ret["user"] = message.user
+
+ if message.collection:
+ ret["collection"] = message.collection
+
+ if message.tags is not None:
+ ret["tags"] = message.tags
+
+ return ret
+
+def to_document_metadata(x):
+
+ return DocumentMetadata(
id = x.get("id", None),
+ time = x.get("time", None),
kind = x.get("kind", None),
- user = x.get("user", None),
- collection = x.get("collection", None),
title = x.get("title", None),
comments = x.get("comments", None),
- time = x.get("time", None),
- document = x.get("document", None),
metadata = to_subgraph(x["metadata"]),
+ user = x.get("user", None),
+ tags = x.get("tags", None),
)
-def to_document_info(x):
+def to_processing_metadata(x):
- return DocumentInfo(
+ return ProcessingMetadata(
id = x.get("id", None),
- kind = x.get("kind", None),
+ document_id = x.get("document-id", None),
+ time = x.get("time", None),
+ flow = x.get("flow", None),
user = x.get("user", None),
collection = x.get("collection", None),
- title = x.get("title", None),
- comments = x.get("comments", None),
- time = x.get("time", None),
- metadata = to_subgraph(x["metadata"]),
+ tags = x.get("tags", None),
)
def to_criteria(x):
@@ -169,3 +167,4 @@ def to_criteria(x):
Critera(v["key"], v["value"], v["operator"])
for v in x
]
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/streamer.py b/trustgraph-flow/trustgraph/gateway/dispatch/streamer.py
new file mode 100644
index 00000000..60d4aff2
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/streamer.py
@@ -0,0 +1,99 @@
+
+import asyncio
+import uuid
+import logging
+
+from ... base import Publisher
+from ... base import Subscriber
+
+logger = logging.getLogger("requestor")
+logger.setLevel(logging.INFO)
+
+class ServiceRequestor:
+
+ def __init__(
+ self,
+ pulsar_client,
+ queue, schema,
+ handler,
+ subscription="api-gateway", consumer_name="api-gateway",
+ timeout=600,
+ ):
+
+ self.sub = Subscriber(
+ pulsar_client, queue,
+ subscription, consumer_name,
+ schema
+ )
+
+ self.timeout = timeout
+
+ self.running = True
+
+ self.receiver = handler
+
+ async def start(self):
+ await self.sub.start()
+ self.streamer = asyncio.create_task(self.stream())
+ sub.start()
+ self.running = True
+
+ async def stop(self):
+ await self.sub.stop()
+ self.running = False
+
+ def from_inbound(self, response):
+ raise RuntimeError("Not defined")
+
+ async def stream(self):
+
+ id = str(uuid.uuid4())
+
+ try:
+
+ q = await self.sub.subscribe(id)
+
+ while self.running:
+
+ try:
+ resp = await asyncio.wait_for(
+ q.get(), timeout=self.timeout
+ )
+ except Exception as e:
+ raise RuntimeError("Timeout")
+
+ if resp.error:
+ err = { "error": {
+ "type": resp.error.type,
+ "message": resp.error.message,
+ } }
+
+ fin = False
+
+ await self.receiver(err, fin)
+
+ else:
+
+ resp, fin = self.from_inbound(resp)
+
+ print(resp, fin)
+
+ await self.receiver(resp, fin)
+
+ if fin: break
+
+ except Exception as e:
+
+ logging.error(f"Exception: {e}")
+
+ err = { "error": {
+ "type": "gateway-error",
+ "message": str(e),
+ } }
+ if responder:
+ await responder(err, True)
+ return err
+
+ finally:
+ await self.sub.unsubscribe(id)
+
diff --git a/trustgraph-flow/trustgraph/gateway/text_completion.py b/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py
similarity index 60%
rename from trustgraph-flow/trustgraph/gateway/text_completion.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py
index ec84e5d6..40ae7616 100644
--- a/trustgraph-flow/trustgraph/gateway/text_completion.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/text_completion.py
@@ -1,20 +1,22 @@
-from .. schema import TextCompletionRequest, TextCompletionResponse
-from .. schema import text_completion_request_queue
-from .. schema import text_completion_response_queue
+from ... schema import TextCompletionRequest, TextCompletionResponse
-from . endpoint import ServiceEndpoint
from . requestor import ServiceRequestor
class TextCompletionRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
+ def __init__(
+ self, pulsar_client, request_queue, response_queue, timeout,
+ consumer, subscriber,
+ ):
super(TextCompletionRequestor, self).__init__(
pulsar_client=pulsar_client,
- request_queue=text_completion_request_queue,
- response_queue=text_completion_response_queue,
+ request_queue=request_queue,
+ response_queue=response_queue,
request_schema=TextCompletionRequest,
response_schema=TextCompletionResponse,
+ subscription = subscriber,
+ consumer_name = consumer,
timeout=timeout,
)
diff --git a/trustgraph-flow/trustgraph/gateway/text_load.py b/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py
similarity index 70%
rename from trustgraph-flow/trustgraph/gateway/text_load.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/text_load.py
index cc432698..53ea7452 100644
--- a/trustgraph-flow/trustgraph/gateway/text_load.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/text_load.py
@@ -1,19 +1,18 @@
import base64
-from .. schema import TextDocument, Metadata
-from .. schema import text_ingest_queue
+from ... schema import TextDocument, Metadata
from . sender import ServiceSender
from . serialize import to_subgraph
-class TextLoadSender(ServiceSender):
- def __init__(self, pulsar_client):
+class TextLoad(ServiceSender):
+ def __init__(self, pulsar_client, queue):
- super(TextLoadSender, self).__init__(
- pulsar_client=pulsar_client,
- request_queue=text_ingest_queue,
- request_schema=TextDocument,
+ super(TextLoad, self).__init__(
+ pulsar_client = pulsar_client,
+ queue = queue,
+ schema = TextDocument,
)
def to_request(self, body):
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/triples_export.py b/trustgraph-flow/trustgraph/gateway/dispatch/triples_export.py
new file mode 100644
index 00000000..d065550e
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/triples_export.py
@@ -0,0 +1,67 @@
+
+import asyncio
+import queue
+import uuid
+
+from ... schema import Triples
+from ... base import Subscriber
+
+from . serialize import serialize_triples
+
+class TriplesExport:
+
+ def __init__(
+ self, ws, running, pulsar_client, queue, consumer, subscriber
+ ):
+
+ self.ws = ws
+ self.running = running
+ self.pulsar_client = pulsar_client
+ self.queue = queue
+ self.consumer = consumer
+ self.subscriber = subscriber
+
+ async def destroy(self):
+ self.running.stop()
+ await self.ws.close()
+
+ async def receive(self, msg):
+ # Ignore incoming info from websocket
+ pass
+
+ async def run(self):
+
+ subs = Subscriber(
+ client = self.pulsar_client, topic = self.queue,
+ consumer_name = self.consumer, subscription = self.subscriber,
+ schema = Triples
+ )
+
+ await subs.start()
+
+ id = str(uuid.uuid4())
+ q = await subs.subscribe_all(id)
+
+ while self.running.get():
+ try:
+
+ resp = await asyncio.wait_for(q.get(), timeout=0.5)
+ await self.ws.send_json(serialize_triples(resp))
+
+ except TimeoutError:
+ continue
+
+ except queue.Empty:
+ continue
+
+ except Exception as e:
+ print(f"Exception: {str(e)}", flush=True)
+ break
+
+ await subs.unsubscribe_all(id)
+
+ await subs.stop()
+
+ await self.ws.close()
+ self.running.stop()
+
diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/triples_import.py b/trustgraph-flow/trustgraph/gateway/dispatch/triples_import.py
new file mode 100644
index 00000000..9b59a0ed
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/triples_import.py
@@ -0,0 +1,58 @@
+
+import asyncio
+import uuid
+from aiohttp import WSMsgType
+
+from ... schema import Metadata
+from ... schema import Triples
+from ... base import Publisher
+
+from . serialize import to_subgraph
+
+class TriplesImport:
+
+ def __init__(
+ self, ws, running, pulsar_client, queue
+ ):
+
+ self.ws = ws
+ self.running = running
+
+ self.publisher = Publisher(
+ pulsar_client, topic = queue, schema = Triples
+ )
+
+ async def destroy(self):
+ self.running.stop()
+
+ if self.ws:
+ await self.ws.close()
+
+ await self.publisher.stop()
+
+ async def receive(self, msg):
+
+ data = msg.json()
+
+ elt = Triples(
+ metadata=Metadata(
+ id=data["metadata"]["id"],
+ metadata=to_subgraph(data["metadata"]["metadata"]),
+ user=data["metadata"]["user"],
+ collection=data["metadata"]["collection"],
+ ),
+ triples=to_subgraph(data["triples"]),
+ )
+
+ await self.publisher.send(None, elt)
+
+ async def run(self):
+
+ while self.running.get():
+ await asyncio.sleep(0.5)
+
+ if self.ws:
+ await self.ws.close()
+
+ self.ws = None
+
diff --git a/trustgraph-flow/trustgraph/gateway/triples_query.py b/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py
similarity index 75%
rename from trustgraph-flow/trustgraph/gateway/triples_query.py
rename to trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py
index 061bd4d8..5db84abd 100644
--- a/trustgraph-flow/trustgraph/gateway/triples_query.py
+++ b/trustgraph-flow/trustgraph/gateway/dispatch/triples_query.py
@@ -1,21 +1,23 @@
-from .. schema import TriplesQueryRequest, TriplesQueryResponse, Triples
-from .. schema import triples_request_queue
-from .. schema import triples_response_queue
+from ... schema import TriplesQueryRequest, TriplesQueryResponse, Triples
-from . endpoint import ServiceEndpoint
from . requestor import ServiceRequestor
from . serialize import to_value, serialize_subgraph
class TriplesQueryRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
+ def __init__(
+ self, pulsar_client, request_queue, response_queue, timeout,
+ consumer, subscriber,
+ ):
super(TriplesQueryRequestor, self).__init__(
pulsar_client=pulsar_client,
- request_queue=triples_request_queue,
- response_queue=triples_response_queue,
+ request_queue=request_queue,
+ response_queue=response_queue,
request_schema=TriplesQueryRequest,
response_schema=TriplesQueryResponse,
+ subscription = subscriber,
+ consumer_name = consumer,
timeout=timeout,
)
diff --git a/trustgraph-flow/trustgraph/gateway/document_embeddings_load.py b/trustgraph-flow/trustgraph/gateway/document_embeddings_load.py
deleted file mode 100644
index 6b4b4838..00000000
--- a/trustgraph-flow/trustgraph/gateway/document_embeddings_load.py
+++ /dev/null
@@ -1,64 +0,0 @@
-
-import asyncio
-from pulsar.schema import JsonSchema
-import uuid
-from aiohttp import WSMsgType
-
-from .. schema import Metadata
-from .. schema import DocumentEmbeddings, ChunkEmbeddings
-from .. schema import document_embeddings_store_queue
-from .. base import Publisher
-
-from . socket import SocketEndpoint
-from . serialize import to_subgraph
-
-class DocumentEmbeddingsLoadEndpoint(SocketEndpoint):
-
- def __init__(
- self, pulsar_client, auth, path="/api/v1/load/document-embeddings",
- ):
-
- super(DocumentEmbeddingsLoadEndpoint, self).__init__(
- endpoint_path=path, auth=auth,
- )
-
- self.pulsar_client=pulsar_client
-
- self.publisher = Publisher(
- self.pulsar_client, document_embeddings_store_queue,
- schema=JsonSchema(DocumentEmbeddings)
- )
-
- async def start(self):
-
- self.publisher.start()
-
- async def listener(self, ws, running):
-
- async for msg in ws:
- # On error, finish
- if msg.type == WSMsgType.ERROR:
- break
- else:
-
- data = msg.json()
-
- elt = DocumentEmbeddings(
- metadata=Metadata(
- id=data["metadata"]["id"],
- metadata=to_subgraph(data["metadata"]["metadata"]),
- user=data["metadata"]["user"],
- collection=data["metadata"]["collection"],
- ),
- chunks=[
- ChunkEmbeddings(
- chunk=de["chunk"].encode("utf-8"),
- vectors=de["vectors"],
- )
- for de in data["chunks"]
- ],
- )
-
- self.publisher.send(None, elt)
-
- running.stop()
diff --git a/trustgraph-flow/trustgraph/gateway/document_embeddings_stream.py b/trustgraph-flow/trustgraph/gateway/document_embeddings_stream.py
deleted file mode 100644
index 6d7db576..00000000
--- a/trustgraph-flow/trustgraph/gateway/document_embeddings_stream.py
+++ /dev/null
@@ -1,73 +0,0 @@
-
-import asyncio
-import queue
-from pulsar.schema import JsonSchema
-import uuid
-
-from .. schema import DocumentEmbeddings
-from .. schema import document_embeddings_store_queue
-from .. base import Subscriber
-
-from . socket import SocketEndpoint
-from . serialize import serialize_document_embeddings
-
-class DocumentEmbeddingsStreamEndpoint(SocketEndpoint):
-
- def __init__(
- self, pulsar_client, auth,
- path="/api/v1/stream/document-embeddings"
- ):
-
- super(DocumentEmbeddingsStreamEndpoint, self).__init__(
- endpoint_path=path, auth=auth,
- )
-
- self.pulsar_client=pulsar_client
-
- self.subscriber = Subscriber(
- self.pulsar_client, document_embeddings_store_queue,
- "api-gateway", "api-gateway",
- schema=JsonSchema(DocumentEmbeddings),
- )
-
- async def listener(self, ws, running):
-
- worker = asyncio.create_task(
- self.async_thread(ws, running)
- )
-
- await super(DocumentEmbeddingsStreamEndpoint, self).listener(
- ws, running
- )
-
- await worker
-
- async def start(self):
-
- self.subscriber.start()
-
- async def async_thread(self, ws, running):
-
- id = str(uuid.uuid4())
-
- q = self.subscriber.subscribe_all(id)
-
- while running.get():
- try:
- resp = await asyncio.to_thread(q.get, timeout=0.5)
- await ws.send_json(serialize_document_embeddings(resp))
-
- except TimeoutError:
- continue
-
- except queue.Empty:
- continue
-
- except Exception as e:
- print(f"Exception: {str(e)}", flush=True)
- break
-
- self.subscriber.unsubscribe_all(id)
-
- running.stop()
-
diff --git a/trustgraph-flow/trustgraph/gateway/endpoint.py b/trustgraph-flow/trustgraph/gateway/endpoint/constant_endpoint.py
similarity index 78%
rename from trustgraph-flow/trustgraph/gateway/endpoint.py
rename to trustgraph-flow/trustgraph/gateway/endpoint/constant_endpoint.py
index 5005463c..eda9da4d 100644
--- a/trustgraph-flow/trustgraph/gateway/endpoint.py
+++ b/trustgraph-flow/trustgraph/gateway/endpoint/constant_endpoint.py
@@ -1,29 +1,25 @@
import asyncio
-from pulsar.schema import JsonSchema
from aiohttp import web
import uuid
import logging
-from .. base import Publisher
-from .. base import Subscriber
-
logger = logging.getLogger("endpoint")
logger.setLevel(logging.INFO)
-class ServiceEndpoint:
+class ConstantEndpoint:
- def __init__(self, endpoint_path, auth, requestor):
+ def __init__(self, endpoint_path, auth, dispatcher):
self.path = endpoint_path
self.auth = auth
self.operation = "service"
- self.requestor = requestor
+ self.dispatcher = dispatcher
async def start(self):
- await self.requestor.start()
+ pass
def add_routes(self, app):
@@ -56,7 +52,7 @@ class ServiceEndpoint:
async def responder(x, fin):
print(x)
- resp = await self.requestor.process(data, responder)
+ resp = await self.dispatcher.process(data, responder)
return web.json_response(resp)
diff --git a/trustgraph-flow/trustgraph/gateway/endpoint/manager.py b/trustgraph-flow/trustgraph/gateway/endpoint/manager.py
new file mode 100644
index 00000000..75a39766
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/endpoint/manager.py
@@ -0,0 +1,64 @@
+
+import asyncio
+
+from aiohttp import web
+
+from . constant_endpoint import ConstantEndpoint
+from . variable_endpoint import VariableEndpoint
+from . socket import SocketEndpoint
+from . metrics import MetricsEndpoint
+
+from .. dispatch.manager import DispatcherManager
+
+class EndpointManager:
+
+ def __init__(
+ self, dispatcher_manager, auth, prometheus_url, timeout=600
+ ):
+
+ self.dispatcher_manager = dispatcher_manager
+ self.timeout = timeout
+
+ self.services = {
+ }
+
+ self.endpoints = [
+ MetricsEndpoint(
+ endpoint_path = "/api/metrics",
+ prometheus_url = prometheus_url,
+ auth = auth,
+ ),
+ VariableEndpoint(
+ endpoint_path = "/api/v1/{kind}", auth = auth,
+ dispatcher = dispatcher_manager.dispatch_global_service(),
+ ),
+ SocketEndpoint(
+ endpoint_path = "/api/v1/socket",
+ auth = auth,
+ dispatcher = dispatcher_manager.dispatch_socket()
+ ),
+ VariableEndpoint(
+ endpoint_path = "/api/v1/flow/{flow}/service/{kind}",
+ auth = auth,
+ dispatcher = dispatcher_manager.dispatch_flow_service(),
+ ),
+ SocketEndpoint(
+ endpoint_path = "/api/v1/flow/{flow}/import/{kind}",
+ auth = auth,
+ dispatcher = dispatcher_manager.dispatch_flow_import()
+ ),
+ SocketEndpoint(
+ endpoint_path = "/api/v1/flow/{flow}/export/{kind}",
+ auth = auth,
+ dispatcher = dispatcher_manager.dispatch_flow_export()
+ ),
+ ]
+
+ def add_routes(self, app):
+ for ep in self.endpoints:
+ ep.add_routes(app)
+
+ async def start(self):
+ for ep in self.endpoints:
+ await ep.start()
+
diff --git a/trustgraph-flow/trustgraph/gateway/metrics.py b/trustgraph-flow/trustgraph/gateway/endpoint/metrics.py
similarity index 97%
rename from trustgraph-flow/trustgraph/gateway/metrics.py
rename to trustgraph-flow/trustgraph/gateway/endpoint/metrics.py
index 33c1fe3a..d8a1ef62 100644
--- a/trustgraph-flow/trustgraph/gateway/metrics.py
+++ b/trustgraph-flow/trustgraph/gateway/endpoint/metrics.py
@@ -7,7 +7,6 @@
import aiohttp
from aiohttp import web
import asyncio
-from pulsar.schema import JsonSchema
import uuid
import logging
diff --git a/trustgraph-flow/trustgraph/gateway/endpoint/socket.py b/trustgraph-flow/trustgraph/gateway/endpoint/socket.py
new file mode 100644
index 00000000..1bfec637
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/endpoint/socket.py
@@ -0,0 +1,111 @@
+
+import asyncio
+from aiohttp import web, WSMsgType
+import logging
+
+from .. running import Running
+
+logger = logging.getLogger("socket")
+logger.setLevel(logging.INFO)
+
+class SocketEndpoint:
+
+ def __init__(
+ self, endpoint_path, auth, dispatcher,
+ ):
+
+ self.path = endpoint_path
+ self.auth = auth
+ self.operation = "socket"
+
+ self.dispatcher = dispatcher
+
+ async def worker(self, ws, dispatcher, running):
+
+ await dispatcher.run()
+
+ async def listener(self, ws, dispatcher, running):
+
+ async for msg in ws:
+
+ # On error, finish
+ if msg.type == WSMsgType.TEXT:
+ await dispatcher.receive(msg)
+ continue
+ elif msg.type == WSMsgType.BINARY:
+ await dispatcher.receive(msg)
+ continue
+ else:
+ break
+
+ running.stop()
+ await ws.close()
+
+ async def handle(self, request):
+
+ try:
+ token = request.query['token']
+ except:
+ token = ""
+
+ if not self.auth.permitted(token, self.operation):
+ return web.HTTPUnauthorized()
+
+ # 50MB max message size
+ ws = web.WebSocketResponse(max_msg_size=52428800)
+
+ await ws.prepare(request)
+
+ try:
+
+ async with asyncio.TaskGroup() as tg:
+
+ running = Running()
+
+ dispatcher = await self.dispatcher(
+ ws, running, request.match_info
+ )
+
+ worker_task = tg.create_task(
+ self.worker(ws, dispatcher, running)
+ )
+
+ lsnr_task = tg.create_task(
+ self.listener(ws, dispatcher, running)
+ )
+
+ print("Created taskgroup, waiting...")
+
+ # Wait for threads to complete
+
+ print("Task group closed")
+
+ # Finally?
+ await dispatcher.destroy()
+
+ except ExceptionGroup as e:
+
+ print("Exception group:", flush=True)
+
+ for se in e.exceptions:
+ print(" Type:", type(se), flush=True)
+ print(f" Exception: {se}", flush=True)
+ except Exception as e:
+ print("Socket exception:", e, flush=True)
+
+ await ws.close()
+
+ return ws
+
+ async def start(self):
+ pass
+
+ async def stop(self):
+ self.running.stop()
+
+ def add_routes(self, app):
+
+ app.add_routes([
+ web.get(self.path, self.handle),
+ ])
+
diff --git a/trustgraph-flow/trustgraph/gateway/endpoint/variable_endpoint.py b/trustgraph-flow/trustgraph/gateway/endpoint/variable_endpoint.py
new file mode 100644
index 00000000..8813165c
--- /dev/null
+++ b/trustgraph-flow/trustgraph/gateway/endpoint/variable_endpoint.py
@@ -0,0 +1,67 @@
+
+import asyncio
+from aiohttp import web
+import uuid
+import logging
+
+logger = logging.getLogger("endpoint")
+logger.setLevel(logging.INFO)
+
+class VariableEndpoint:
+
+ def __init__(self, endpoint_path, auth, dispatcher):
+
+ self.path = endpoint_path
+
+ self.auth = auth
+ self.operation = "service"
+
+ self.dispatcher = dispatcher
+
+ async def start(self):
+ pass
+
+ def add_routes(self, app):
+
+ app.add_routes([
+ web.post(self.path, self.handle),
+ ])
+
+ async def handle(self, request):
+
+ print(request.path, "...")
+
+ try:
+ ht = request.headers["Authorization"]
+ tokens = ht.split(" ", 2)
+ if tokens[0] != "Bearer":
+ return web.HTTPUnauthorized()
+ token = tokens[1]
+ except:
+ token = ""
+
+ if not self.auth.permitted(token, self.operation):
+ return web.HTTPUnauthorized()
+
+ try:
+
+ data = await request.json()
+
+ print(data)
+
+ async def responder(x, fin):
+ print(x)
+
+ resp = await self.dispatcher.process(
+ data, responder, request.match_info
+ )
+
+ return web.json_response(resp)
+
+ except Exception as e:
+ logging.error(f"Exception: {e}")
+
+ return web.json_response(
+ { "error": str(e) }
+ )
+
diff --git a/trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py b/trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py
deleted file mode 100644
index c1354ce5..00000000
--- a/trustgraph-flow/trustgraph/gateway/graph_embeddings_load.py
+++ /dev/null
@@ -1,65 +0,0 @@
-
-import asyncio
-from pulsar.schema import JsonSchema
-import uuid
-from aiohttp import WSMsgType
-
-from .. schema import Metadata
-from .. schema import GraphEmbeddings, EntityEmbeddings
-from .. schema import graph_embeddings_store_queue
-from .. base import Publisher
-
-from . socket import SocketEndpoint
-from . serialize import to_subgraph, to_value
-
-class GraphEmbeddingsLoadEndpoint(SocketEndpoint):
-
- def __init__(
- self, pulsar_client, auth, path="/api/v1/load/graph-embeddings",
- ):
-
- super(GraphEmbeddingsLoadEndpoint, self).__init__(
- endpoint_path=path, auth=auth,
- )
-
- self.pulsar_client=pulsar_client
-
- self.publisher = Publisher(
- self.pulsar_client, graph_embeddings_store_queue,
- schema=JsonSchema(GraphEmbeddings)
- )
-
- async def start(self):
-
- self.publisher.start()
-
- async def listener(self, ws, running):
-
- async for msg in ws:
-
- # On error, finish
- if msg.type == WSMsgType.ERROR:
- break
- else:
-
- data = msg.json()
-
- elt = GraphEmbeddings(
- metadata=Metadata(
- id=data["metadata"]["id"],
- metadata=to_subgraph(data["metadata"]["metadata"]),
- user=data["metadata"]["user"],
- collection=data["metadata"]["collection"],
- ),
- entities=[
- EntityEmbeddings(
- entity=to_value(ent["entity"]),
- vectors=ent["vectors"],
- )
- for ent in data["entities"]
- ]
- )
-
- self.publisher.send(None, elt)
-
- running.stop()
diff --git a/trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py b/trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py
deleted file mode 100644
index 385eb9f4..00000000
--- a/trustgraph-flow/trustgraph/gateway/graph_embeddings_stream.py
+++ /dev/null
@@ -1,70 +0,0 @@
-
-import asyncio
-import queue
-from pulsar.schema import JsonSchema
-import uuid
-
-from .. schema import GraphEmbeddings
-from .. schema import graph_embeddings_store_queue
-from .. base import Subscriber
-
-from . socket import SocketEndpoint
-from . serialize import serialize_graph_embeddings
-
-class GraphEmbeddingsStreamEndpoint(SocketEndpoint):
-
- def __init__(
- self, pulsar_client, auth, path="/api/v1/stream/graph-embeddings"
- ):
-
- super(GraphEmbeddingsStreamEndpoint, self).__init__(
- endpoint_path=path, auth=auth,
- )
-
- self.pulsar_client=pulsar_client
-
- self.subscriber = Subscriber(
- self.pulsar_client, graph_embeddings_store_queue,
- "api-gateway", "api-gateway",
- schema=JsonSchema(GraphEmbeddings)
- )
-
- async def listener(self, ws, running):
-
- worker = asyncio.create_task(
- self.async_thread(ws, running)
- )
-
- await super(GraphEmbeddingsStreamEndpoint, self).listener(ws, running)
-
- await worker
-
- async def start(self):
-
- self.subscriber.start()
-
- async def async_thread(self, ws, running):
-
- id = str(uuid.uuid4())
-
- q = self.subscriber.subscribe_all(id)
-
- while running.get():
- try:
- resp = await asyncio.to_thread(q.get, timeout=0.5)
- await ws.send_json(serialize_graph_embeddings(resp))
-
- except TimeoutError:
- continue
-
- except queue.Empty:
- continue
-
- except Exception as e:
- print(f"Exception: {str(e)}", flush=True)
- break
-
- self.subscriber.unsubscribe_all(id)
-
- running.stop()
-
diff --git a/trustgraph-flow/trustgraph/gateway/librarian.py b/trustgraph-flow/trustgraph/gateway/librarian.py
deleted file mode 100644
index e6ff7ce3..00000000
--- a/trustgraph-flow/trustgraph/gateway/librarian.py
+++ /dev/null
@@ -1,62 +0,0 @@
-
-from .. schema import LibrarianRequest, LibrarianResponse, Triples
-from .. schema import librarian_request_queue
-from .. schema import librarian_response_queue
-
-from . endpoint import ServiceEndpoint
-from . requestor import ServiceRequestor
-from . serialize import serialize_document_package, serialize_document_info
-from . serialize import to_document_package, to_document_info, to_criteria
-
-class LibrarianRequestor(ServiceRequestor):
- def __init__(self, pulsar_client, timeout, auth):
-
- super(LibrarianRequestor, self).__init__(
- pulsar_client=pulsar_client,
- request_queue=librarian_request_queue,
- response_queue=librarian_response_queue,
- request_schema=LibrarianRequest,
- response_schema=LibrarianResponse,
- timeout=timeout,
- )
-
- def to_request(self, body):
-
- print("TRR")
- if "document" in body:
- dp = to_document_package(body["document"])
- else:
- dp = None
-
- print("GOT")
- if "criteria" in body:
- criteria = to_criteria(body["criteria"])
- else:
- criteria = None
-
- print("ASLDKJ")
-
- return LibrarianRequest(
- operation = body.get("operation", None),
- id = body.get("id", None),
- document = dp,
- user = body.get("user", None),
- collection = body.get("collection", None),
- criteria = criteria,
- )
-
- def from_response(self, message):
-
- response = {}
-
- if message.document:
- response["document"] = serialize_document_package(message.document)
-
- if message.info:
- response["info"] = [
- serialize_document_info(v)
- for v in message.info
- ]
-
- return response, True
-
diff --git a/trustgraph-flow/trustgraph/gateway/mux.py b/trustgraph-flow/trustgraph/gateway/mux.py
deleted file mode 100644
index 23b693ab..00000000
--- a/trustgraph-flow/trustgraph/gateway/mux.py
+++ /dev/null
@@ -1,169 +0,0 @@
-
-import asyncio
-import queue
-from pulsar.schema import JsonSchema
-import uuid
-from aiohttp import web, WSMsgType
-
-from . socket import SocketEndpoint
-from . text_completion import TextCompletionRequestor
-
-MAX_OUTSTANDING_REQUESTS = 15
-WORKER_CLOSE_WAIT = 0.01
-START_REQUEST_WAIT = 0.1
-
-# This buffers requests until task start, so short-lived
-MAX_QUEUE_SIZE = 10
-
-class MuxEndpoint(SocketEndpoint):
-
- def __init__(
- self, pulsar_client, auth,
- services,
- path="/api/v1/socket",
- ):
-
- super(MuxEndpoint, self).__init__(
- endpoint_path=path, auth=auth,
- )
-
- self.services = services
-
- async def start(self):
- pass
-
- async def maybe_tidy_workers(self, workers):
-
- while True:
-
- try:
-
- await asyncio.wait_for(
- asyncio.shield(workers[0]),
- WORKER_CLOSE_WAIT
- )
-
- # worker[0] now stopped
- # FIXME: Delete reference???
-
- workers.pop(0)
-
- if len(workers) == 0:
- break
-
- # Loop iterates to try the next worker
-
- except TimeoutError:
- # worker[0] still running, move on
- break
-
- async def start_request_task(self, ws, id, svc, request, workers):
-
- if svc not in self.services:
- await ws.send_json({"id": id, "error": "Service not recognised"})
- return
-
- requestor = self.services[svc]
-
- async def responder(resp, fin):
- await ws.send_json({
- "id": id,
- "response": resp,
- "complete": fin,
- })
-
- # Wait for outstanding requests to go below MAX_OUTSTANDING_REQUESTS
- while len(workers) > MAX_OUTSTANDING_REQUESTS:
-
- # Fixes deadlock
- # FIXME: Put it in its own loop
- await asyncio.sleep(START_REQUEST_WAIT)
-
- await self.maybe_tidy_workers(workers)
-
- worker = asyncio.create_task(
- requestor.process(request, responder)
- )
-
- workers.append(worker)
-
- async def async_thread(self, ws, running, q):
-
- # Worker threads, servicing
- workers = []
-
- while running.get():
-
- try:
-
- if len(workers) > 0:
- await self.maybe_tidy_workers(workers)
-
- # Get next request on queue
- id, svc, request = await asyncio.wait_for(q.get(), 1)
-
- except TimeoutError:
- continue
-
- except Exception as e:
- # This is an internal working error, may not be recoverable
- print("Exception:", e)
- await ws.send_json({"id": id, "error": str(e)})
- break
-
- try:
- print(id, svc, request)
- await self.start_request_task(ws, id, svc, request, workers)
-
- except Exception as e:
- print("Exception2:", e)
- await ws.send_json({"error": str(e)})
-
- running.stop()
-
- async def listener(self, ws, running):
-
- # The outstanding request queue, max size is MAX_QUEUE_SIZE
- q = asyncio.Queue(maxsize=MAX_QUEUE_SIZE)
-
- async_task = asyncio.create_task(self.async_thread(
- ws, running, q
- ))
-
- async for msg in ws:
-
- # On error, finish
- if msg.type == WSMsgType.TEXT:
-
- try:
-
- data = msg.json()
-
- if data["service"] not in self.services:
- raise RuntimeError("Bad service")
-
- if "request" not in data:
- raise RuntimeError("Bad message")
-
- if "id" not in data:
- raise RuntimeError("Bad message")
-
- await q.put(
- (data["id"], data["service"], data["request"])
- )
-
- except Exception as e:
-
- await ws.send_json({"error": str(e)})
- continue
-
- elif msg.type == WSMsgType.ERROR:
- break
- elif msg.type == WSMsgType.CLOSE:
- break
- else:
- break
-
- running.stop()
-
- await async_task
diff --git a/trustgraph-flow/trustgraph/gateway/service.py b/trustgraph-flow/trustgraph/gateway/service.py
index d3122e3b..97406422 100755
--- a/trustgraph-flow/trustgraph/gateway/service.py
+++ b/trustgraph-flow/trustgraph/gateway/service.py
@@ -3,54 +3,22 @@ API gateway. Offers HTTP services which are translated to interaction on the
Pulsar bus.
"""
-module = ".".join(__name__.split(".")[1:-1])
-
-# FIXME: Subscribes to Pulsar unnecessarily, should only do it when there
-# are active listeners
-
-# FIXME: Connection errors in publishers / subscribers cause those threads
-# to fail and are not failed or retried
-
import asyncio
import argparse
from aiohttp import web
import logging
import os
-import base64
-
-import pulsar
-from pulsar.schema import JsonSchema
-from prometheus_client import start_http_server
from .. log_level import LogLevel
-from . serialize import to_subgraph
-from . running import Running
-from . text_completion import TextCompletionRequestor
-from . prompt import PromptRequestor
-from . graph_rag import GraphRagRequestor
-from . document_rag import DocumentRagRequestor
-from . triples_query import TriplesQueryRequestor
-from . graph_embeddings_query import GraphEmbeddingsQueryRequestor
-from . embeddings import EmbeddingsRequestor
-from . encyclopedia import EncyclopediaRequestor
-from . agent import AgentRequestor
-from . dbpedia import DbpediaRequestor
-from . internet_search import InternetSearchRequestor
-from . librarian import LibrarianRequestor
-from . triples_stream import TriplesStreamEndpoint
-from . graph_embeddings_stream import GraphEmbeddingsStreamEndpoint
-from . document_embeddings_stream import DocumentEmbeddingsStreamEndpoint
-from . triples_load import TriplesLoadEndpoint
-from . graph_embeddings_load import GraphEmbeddingsLoadEndpoint
-from . document_embeddings_load import DocumentEmbeddingsLoadEndpoint
-from . mux import MuxEndpoint
-from . document_load import DocumentLoadSender
-from . text_load import TextLoadSender
-from . metrics import MetricsEndpoint
-
-from . endpoint import ServiceEndpoint
from . auth import Authenticator
+from . config.receiver import ConfigReceiver
+from . dispatch.manager import DispatcherManager
+
+from . endpoint.manager import EndpointManager
+
+import pulsar
+from prometheus_client import start_http_server
logger = logging.getLogger("api")
logger.setLevel(logging.INFO)
@@ -66,17 +34,13 @@ class Api:
def __init__(self, **config):
- self.app = web.Application(
- middlewares=[],
- client_max_size=256 * 1024 * 1024
- )
-
self.port = int(config.get("port", default_port))
self.timeout = int(config.get("timeout", default_timeout))
self.pulsar_host = config.get("pulsar_host", default_pulsar_host)
self.pulsar_api_key = config.get(
"pulsar_api_key", default_pulsar_api_key
)
+
self.pulsar_listener = config.get("pulsar_listener", None)
if self.pulsar_api_key:
@@ -104,165 +68,42 @@ class Api:
else:
self.auth = Authenticator(allow_all=True)
- self.services = {
- "text-completion": TextCompletionRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "prompt": PromptRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "graph-rag": GraphRagRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "document-rag": DocumentRagRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "triples-query": TriplesQueryRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "graph-embeddings-query": GraphEmbeddingsQueryRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "embeddings": EmbeddingsRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "agent": AgentRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "librarian": LibrarianRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "encyclopedia": EncyclopediaRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "dbpedia": DbpediaRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "internet-search": InternetSearchRequestor(
- pulsar_client=self.pulsar_client, timeout=self.timeout,
- auth = self.auth,
- ),
- "document-load": DocumentLoadSender(
- pulsar_client=self.pulsar_client,
- ),
- "text-load": TextLoadSender(
- pulsar_client=self.pulsar_client,
- ),
- }
+ self.config_receiver = ConfigReceiver(self.pulsar_client)
+
+ self.dispatcher_manager = DispatcherManager(
+ pulsar_client = self.pulsar_client,
+ config_receiver = self.config_receiver,
+ )
+
+ self.endpoint_manager = EndpointManager(
+ dispatcher_manager = self.dispatcher_manager,
+ auth = self.auth,
+ prometheus_url = self.prometheus_url,
+ timeout = self.timeout,
+
+ )
self.endpoints = [
- ServiceEndpoint(
- endpoint_path = "/api/v1/text-completion", auth=self.auth,
- requestor = self.services["text-completion"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/prompt", auth=self.auth,
- requestor = self.services["prompt"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/graph-rag", auth=self.auth,
- requestor = self.services["graph-rag"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/document-rag", auth=self.auth,
- requestor = self.services["document-rag"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/triples-query", auth=self.auth,
- requestor = self.services["triples-query"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/graph-embeddings-query",
- auth=self.auth,
- requestor = self.services["graph-embeddings-query"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/embeddings", auth=self.auth,
- requestor = self.services["embeddings"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/agent", auth=self.auth,
- requestor = self.services["agent"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/librarian", auth=self.auth,
- requestor = self.services["librarian"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/encyclopedia", auth=self.auth,
- requestor = self.services["encyclopedia"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/dbpedia", auth=self.auth,
- requestor = self.services["dbpedia"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/internet-search", auth=self.auth,
- requestor = self.services["internet-search"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/load/document", auth=self.auth,
- requestor = self.services["document-load"],
- ),
- ServiceEndpoint(
- endpoint_path = "/api/v1/load/text", auth=self.auth,
- requestor = self.services["text-load"],
- ),
- TriplesStreamEndpoint(
- pulsar_client=self.pulsar_client,
- auth = self.auth,
- ),
- GraphEmbeddingsStreamEndpoint(
- pulsar_client=self.pulsar_client,
- auth = self.auth,
- ),
- DocumentEmbeddingsStreamEndpoint(
- pulsar_client=self.pulsar_client,
- auth = self.auth,
- ),
- TriplesLoadEndpoint(
- pulsar_client=self.pulsar_client,
- auth = self.auth,
- ),
- GraphEmbeddingsLoadEndpoint(
- pulsar_client=self.pulsar_client,
- auth = self.auth,
- ),
- DocumentEmbeddingsLoadEndpoint(
- pulsar_client=self.pulsar_client,
- auth = self.auth,
- ),
- MuxEndpoint(
- pulsar_client=self.pulsar_client,
- auth = self.auth,
- services = self.services,
- ),
- MetricsEndpoint(
- endpoint_path = "/api/v1/metrics",
- prometheus_url = self.prometheus_url,
- auth = self.auth,
- ),
]
+ async def app_factory(self):
+
+ self.app = web.Application(
+ middlewares=[],
+ client_max_size=256 * 1024 * 1024
+ )
+
+ await self.config_receiver.start()
+
for ep in self.endpoints:
ep.add_routes(self.app)
- async def app_factory(self):
-
for ep in self.endpoints:
await ep.start()
+ self.endpoint_manager.add_routes(self.app)
+ await self.endpoint_manager.start()
+
return self.app
def run(self):
diff --git a/trustgraph-flow/trustgraph/gateway/socket.py b/trustgraph-flow/trustgraph/gateway/socket.py
deleted file mode 100644
index c32a28af..00000000
--- a/trustgraph-flow/trustgraph/gateway/socket.py
+++ /dev/null
@@ -1,72 +0,0 @@
-
-import asyncio
-from aiohttp import web, WSMsgType
-import logging
-
-from . running import Running
-
-logger = logging.getLogger("socket")
-logger.setLevel(logging.INFO)
-
-class SocketEndpoint:
-
- def __init__(
- self, endpoint_path, auth,
- ):
-
- self.path = endpoint_path
- self.auth = auth
- self.operation = "socket"
-
- async def listener(self, ws, running):
-
- async for msg in ws:
- # On error, finish
- if msg.type == WSMsgType.TEXT:
- # Ignore incoming message
- continue
- elif msg.type == WSMsgType.BINARY:
- # Ignore incoming message
- continue
- else:
- break
-
- running.stop()
-
- async def handle(self, request):
-
- try:
- token = request.query['token']
- except:
- token = ""
-
- if not self.auth.permitted(token, self.operation):
- return web.HTTPUnauthorized()
-
- running = Running()
-
- # 50MB max message size
- ws = web.WebSocketResponse(max_msg_size=52428800)
-
- await ws.prepare(request)
-
- try:
- await self.listener(ws, running)
- except Exception as e:
- print("Socket exception:", e, flush=True)
-
- running.stop()
-
- await ws.close()
-
- return ws
-
- async def start(self):
- pass
-
- def add_routes(self, app):
-
- app.add_routes([
- web.get(self.path, self.handle),
- ])
-
diff --git a/trustgraph-flow/trustgraph/gateway/triples_load.py b/trustgraph-flow/trustgraph/gateway/triples_load.py
deleted file mode 100644
index bc69975e..00000000
--- a/trustgraph-flow/trustgraph/gateway/triples_load.py
+++ /dev/null
@@ -1,57 +0,0 @@
-
-import asyncio
-from pulsar.schema import JsonSchema
-import uuid
-from aiohttp import WSMsgType
-
-from .. schema import Metadata
-from .. schema import Triples
-from .. schema import triples_store_queue
-from .. base import Publisher
-
-from . socket import SocketEndpoint
-from . serialize import to_subgraph
-
-class TriplesLoadEndpoint(SocketEndpoint):
-
- def __init__(self, pulsar_client, auth, path="/api/v1/load/triples"):
-
- super(TriplesLoadEndpoint, self).__init__(
- endpoint_path=path, auth=auth,
- )
-
- self.pulsar_client=pulsar_client
-
- self.publisher = Publisher(
- self.pulsar_client, triples_store_queue,
- schema=JsonSchema(Triples)
- )
-
- async def start(self):
-
- self.publisher.start()
-
- async def listener(self, ws, running):
-
- async for msg in ws:
- # On error, finish
- if msg.type == WSMsgType.ERROR:
- break
- else:
-
- data = msg.json()
-
- elt = Triples(
- metadata=Metadata(
- id=data["metadata"]["id"],
- metadata=to_subgraph(data["metadata"]["metadata"]),
- user=data["metadata"]["user"],
- collection=data["metadata"]["collection"],
- ),
- triples=to_subgraph(data["triples"]),
- )
-
- self.publisher.send(None, elt)
-
-
- running.stop()
diff --git a/trustgraph-flow/trustgraph/gateway/triples_stream.py b/trustgraph-flow/trustgraph/gateway/triples_stream.py
deleted file mode 100644
index a5d5ad0a..00000000
--- a/trustgraph-flow/trustgraph/gateway/triples_stream.py
+++ /dev/null
@@ -1,68 +0,0 @@
-
-import asyncio
-import queue
-from pulsar.schema import JsonSchema
-import uuid
-
-from .. schema import Triples
-from .. schema import triples_store_queue
-from .. base import Subscriber
-
-from . socket import SocketEndpoint
-from . serialize import serialize_triples
-
-class TriplesStreamEndpoint(SocketEndpoint):
-
- def __init__(self, pulsar_client, auth, path="/api/v1/stream/triples"):
-
- super(TriplesStreamEndpoint, self).__init__(
- endpoint_path=path, auth=auth,
- )
-
- self.pulsar_client=pulsar_client
-
- self.subscriber = Subscriber(
- self.pulsar_client, triples_store_queue,
- "api-gateway", "api-gateway",
- schema=JsonSchema(Triples)
- )
-
- async def listener(self, ws, running):
-
- worker = asyncio.create_task(
- self.async_thread(ws, running)
- )
-
- await super(TriplesStreamEndpoint, self).listener(ws, running)
-
- await worker
-
- async def start(self):
-
- self.subscriber.start()
-
- async def async_thread(self, ws, running):
-
- id = str(uuid.uuid4())
-
- q = self.subscriber.subscribe_all(id)
-
- while running.get():
- try:
- resp = await asyncio.to_thread(q.get, timeout=0.5)
- await ws.send_json(serialize_triples(resp))
-
- except TimeoutError:
- continue
-
- except queue.Empty:
- continue
-
- except Exception as e:
- print(f"Exception: {str(e)}", flush=True)
- break
-
- self.subscriber.unsubscribe_all(id)
-
- running.stop()
-
diff --git a/trustgraph-flow/trustgraph/gateway/dbpedia.py b/trustgraph-flow/trustgraph/gateway/unused/dbpedia.py
similarity index 100%
rename from trustgraph-flow/trustgraph/gateway/dbpedia.py
rename to trustgraph-flow/trustgraph/gateway/unused/dbpedia.py
diff --git a/trustgraph-flow/trustgraph/gateway/encyclopedia.py b/trustgraph-flow/trustgraph/gateway/unused/encyclopedia.py
similarity index 100%
rename from trustgraph-flow/trustgraph/gateway/encyclopedia.py
rename to trustgraph-flow/trustgraph/gateway/unused/encyclopedia.py
diff --git a/trustgraph-flow/trustgraph/gateway/internet_search.py b/trustgraph-flow/trustgraph/gateway/unused/internet_search.py
similarity index 100%
rename from trustgraph-flow/trustgraph/gateway/internet_search.py
rename to trustgraph-flow/trustgraph/gateway/unused/internet_search.py
diff --git a/trustgraph-flow/trustgraph/graph_rag.py b/trustgraph-flow/trustgraph/graph_rag.py
deleted file mode 100644
index 6a4e11c5..00000000
--- a/trustgraph-flow/trustgraph/graph_rag.py
+++ /dev/null
@@ -1,295 +0,0 @@
-
-from . clients.graph_embeddings_client import GraphEmbeddingsClient
-from . clients.triples_query_client import TriplesQueryClient
-from . clients.embeddings_client import EmbeddingsClient
-from . clients.prompt_client import PromptClient
-
-from . schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
-from . schema import TriplesQueryRequest, TriplesQueryResponse
-from . schema import prompt_request_queue
-from . schema import prompt_response_queue
-from . schema import embeddings_request_queue
-from . schema import embeddings_response_queue
-from . schema import graph_embeddings_request_queue
-from . schema import graph_embeddings_response_queue
-from . schema import triples_request_queue
-from . schema import triples_response_queue
-
-LABEL="http://www.w3.org/2000/01/rdf-schema#label"
-DEFINITION="http://www.w3.org/2004/02/skos/core#definition"
-
-class Query:
-
- def __init__(
- self, rag, user, collection, verbose,
- entity_limit=50, triple_limit=30, max_subgraph_size=1000,
- max_path_length=2,
- ):
- self.rag = rag
- self.user = user
- self.collection = collection
- self.verbose = verbose
- self.entity_limit = entity_limit
- self.triple_limit = triple_limit
- self.max_subgraph_size = max_subgraph_size
- self.max_path_length = max_path_length
-
- def get_vector(self, query):
-
- if self.verbose:
- print("Compute embeddings...", flush=True)
-
- qembeds = self.rag.embeddings.request(query)
-
- if self.verbose:
- print("Done.", flush=True)
-
- return qembeds
-
- def get_entities(self, query):
-
- vectors = self.get_vector(query)
-
- if self.verbose:
- print("Get entities...", flush=True)
-
- entities = self.rag.ge_client.request(
- user=self.user, collection=self.collection,
- vectors=vectors, limit=self.entity_limit,
- )
-
- entities = [
- e.value
- for e in entities
- ]
-
- if self.verbose:
- print("Entities:", flush=True)
- for ent in entities:
- print(" ", ent, flush=True)
-
- return entities
-
- def maybe_label(self, e):
-
- if e in self.rag.label_cache:
- return self.rag.label_cache[e]
-
- res = self.rag.triples_client.request(
- user=self.user, collection=self.collection,
- s=e, p=LABEL, o=None, limit=1,
- )
-
- if len(res) == 0:
- self.rag.label_cache[e] = e
- return e
-
- self.rag.label_cache[e] = res[0].o.value
- return self.rag.label_cache[e]
-
- def follow_edges(self, ent, subgraph, path_length):
-
- # Not needed?
- if path_length <= 0:
- return
-
- # Stop spanning around if the subgraph is already maxed out
- if len(subgraph) >= self.max_subgraph_size:
- return
-
- res = self.rag.triples_client.request(
- user=self.user, collection=self.collection,
- s=ent, p=None, o=None,
- limit=self.triple_limit
- )
-
- for triple in res:
- subgraph.add(
- (triple.s.value, triple.p.value, triple.o.value)
- )
- if path_length > 1:
- self.follow_edges(triple.o.value, subgraph, path_length-1)
-
- res = self.rag.triples_client.request(
- user=self.user, collection=self.collection,
- s=None, p=ent, o=None,
- limit=self.triple_limit
- )
-
- for triple in res:
- subgraph.add(
- (triple.s.value, triple.p.value, triple.o.value)
- )
-
- res = self.rag.triples_client.request(
- user=self.user, collection=self.collection,
- s=None, p=None, o=ent,
- limit=self.triple_limit,
- )
-
- for triple in res:
- subgraph.add(
- (triple.s.value, triple.p.value, triple.o.value)
- )
- if path_length > 1:
- self.follow_edges(triple.s.value, subgraph, path_length-1)
-
- def get_subgraph(self, query):
-
- entities = self.get_entities(query)
-
- if self.verbose:
- print("Get subgraph...", flush=True)
-
- subgraph = set()
-
- for ent in entities:
- self.follow_edges(ent, subgraph, self.max_path_length)
-
- subgraph = list(subgraph)
-
- return subgraph
-
- def get_labelgraph(self, query):
-
- subgraph = self.get_subgraph(query)
-
- sg2 = []
-
- for edge in subgraph:
-
- if edge[1] == LABEL:
- continue
-
- s = self.maybe_label(edge[0])
- p = self.maybe_label(edge[1])
- o = self.maybe_label(edge[2])
-
- sg2.append((s, p, o))
-
- sg2 = sg2[0:self.max_subgraph_size]
-
- if self.verbose:
- print("Subgraph:", flush=True)
- for edge in sg2:
- print(" ", str(edge), flush=True)
-
- if self.verbose:
- print("Done.", flush=True)
-
- return sg2
-
-class GraphRag:
-
- def __init__(
- self,
- pulsar_host="pulsar://pulsar:6650",
- pulsar_api_key=None,
- pr_request_queue=None,
- pr_response_queue=None,
- emb_request_queue=None,
- emb_response_queue=None,
- ge_request_queue=None,
- ge_response_queue=None,
- tpl_request_queue=None,
- tpl_response_queue=None,
- verbose=False,
- module="test",
- ):
-
- self.verbose=verbose
-
- if pr_request_queue is None:
- pr_request_queue = prompt_request_queue
-
- if pr_response_queue is None:
- pr_response_queue = prompt_response_queue
-
- if emb_request_queue is None:
- emb_request_queue = embeddings_request_queue
-
- if emb_response_queue is None:
- emb_response_queue = embeddings_response_queue
-
- if ge_request_queue is None:
- ge_request_queue = graph_embeddings_request_queue
-
- if ge_response_queue is None:
- ge_response_queue = graph_embeddings_response_queue
-
- if tpl_request_queue is None:
- tpl_request_queue = triples_request_queue
-
- if tpl_response_queue is None:
- tpl_response_queue = triples_response_queue
-
- if self.verbose:
- print("Initialising...", flush=True)
-
- self.ge_client = GraphEmbeddingsClient(
- pulsar_host=pulsar_host,
- pulsar_api_key=pulsar_api_key,
- subscriber=module + "-ge",
- input_queue=ge_request_queue,
- output_queue=ge_response_queue,
- )
-
- self.triples_client = TriplesQueryClient(
- pulsar_host=pulsar_host,
- pulsar_api_key=pulsar_api_key,
- subscriber=module + "-tpl",
- input_queue=tpl_request_queue,
- output_queue=tpl_response_queue
- )
-
- self.embeddings = EmbeddingsClient(
- pulsar_host=pulsar_host,
- pulsar_api_key=pulsar_api_key,
- input_queue=emb_request_queue,
- output_queue=emb_response_queue,
- subscriber=module + "-emb",
- )
-
- self.label_cache = {}
-
- self.prompt = PromptClient(
- pulsar_host=pulsar_host,
- pulsar_api_key=pulsar_api_key,
- input_queue=pr_request_queue,
- output_queue=pr_response_queue,
- subscriber=module + "-prompt",
- )
-
- if self.verbose:
- print("Initialised", flush=True)
-
- def query(
- self, query, user="trustgraph", collection="default",
- entity_limit=50, triple_limit=30, max_subgraph_size=1000,
- max_path_length=2,
- ):
-
- if self.verbose:
- print("Construct prompt...", flush=True)
-
- q = Query(
- rag=self, user=user, collection=collection, verbose=self.verbose,
- entity_limit=entity_limit, triple_limit=triple_limit,
- max_subgraph_size=max_subgraph_size,
- max_path_length=max_path_length,
- )
-
- kg = q.get_labelgraph(query)
-
- if self.verbose:
- print("Invoke LLM...", flush=True)
- print(kg)
- print(query)
-
- resp = self.prompt.request_kg_prompt(query, kg)
-
- if self.verbose:
- print("Done", flush=True)
-
- return resp
-
diff --git a/trustgraph-flow/trustgraph/librarian/blob_store.py b/trustgraph-flow/trustgraph/librarian/blob_store.py
index 5cffef18..3368f57e 100644
--- a/trustgraph-flow/trustgraph/librarian/blob_store.py
+++ b/trustgraph-flow/trustgraph/librarian/blob_store.py
@@ -37,7 +37,7 @@ class BlobStore:
else:
print("Bucket", self.bucket_name, "already exists", flush=True)
- def add(self, object_id, blob, kind):
+ async def add(self, object_id, blob, kind):
# FIXME: Loop retry
self.minio.put_object(
@@ -49,3 +49,25 @@ class BlobStore:
)
print("Add blob complete", flush=True)
+
+ async def remove(self, object_id):
+
+ # FIXME: Loop retry
+ self.minio.remove_object(
+ bucket_name = self.bucket_name,
+ object_name = "doc/" + str(object_id),
+ )
+
+ print("Remove blob complete", flush=True)
+
+
+ async def get(self, object_id):
+
+ # FIXME: Loop retry
+ resp = self.minio.get_object(
+ bucket_name = self.bucket_name,
+ object_name = "doc/" + str(object_id),
+ )
+
+ return resp.read()
+
diff --git a/trustgraph-flow/trustgraph/librarian/librarian.py b/trustgraph-flow/trustgraph/librarian/librarian.py
index 9bccc37a..89750c42 100644
--- a/trustgraph-flow/trustgraph/librarian/librarian.py
+++ b/trustgraph-flow/trustgraph/librarian/librarian.py
@@ -1,8 +1,10 @@
+
from .. schema import LibrarianRequest, LibrarianResponse, Error, Triple
from .. knowledge import hash
from .. exceptions import RequestError
-from . table_store import TableStore
+from .. tables.library import LibraryTableStore
from . blob_store import BlobStore
+import base64
import uuid
@@ -12,77 +14,255 @@ class Librarian:
self,
cassandra_host, cassandra_user, cassandra_password,
minio_host, minio_access_key, minio_secret_key,
- bucket_name, keyspace, load_document, load_text,
+ bucket_name, keyspace, load_document,
):
self.blob_store = BlobStore(
minio_host, minio_access_key, minio_secret_key, bucket_name
)
- self.table_store = TableStore(
+ self.table_store = LibraryTableStore(
cassandra_host, cassandra_user, cassandra_password, keyspace
)
self.load_document = load_document
- self.load_text = load_text
- async def add(self, document):
+ async def add_document(self, request):
- if document.kind not in (
+ if request.document_metadata.kind not in (
"text/plain", "application/pdf"
):
- raise RequestError("Invalid document kind: " + document.kind)
+ raise RequestError(
+ "Invalid document kind: " + request.document_metadata.kind
+ )
- # Create object ID as a hash of the document
- object_id = uuid.UUID(hash(document.document))
+ if await self.table_store.document_exists(
+ request.document_metadata.user,
+ request.document_metadata.id
+ ):
+ raise RuntimeError("Document already exists")
- self.blob_store.add(object_id, document.document, document.kind)
+ # Create object ID for blob
+ object_id = uuid.uuid4()
- self.table_store.add(object_id, document)
+ print("Add blob...")
- if document.kind == "application/pdf":
- await self.load_document(document)
- elif document.kind == "text/plain":
- await self.load_text(document)
+ await self.blob_store.add(
+ object_id, base64.b64decode(request.content),
+ request.document_metadata.kind
+ )
+
+ print("Add table...")
+
+ await self.table_store.add_document(
+ request.document_metadata, object_id
+ )
print("Add complete", flush=True)
return LibrarianResponse(
error = None,
- document = None,
- info = None,
+ document_metadata = None,
+ content = None,
+ document_metadatas = None,
+ processing_metadatas = None,
)
- async def list(self, user, collection):
+ async def remove_document(self, request):
- print("list")
+ print("Removing doc...")
- info = self.table_store.list(user, collection)
+ if not await self.table_store.document_exists(
+ request.user,
+ request.document_id,
+ ):
+ raise RuntimeError("Document does not exist")
- print(">>", info)
+ object_id = await self.table_store.get_document_object_id(
+ request.user,
+ request.document_id
+ )
+
+ # Remove blob...
+ await self.blob_store.remove(object_id)
+
+ # Remove doc table row
+ await self.table_store.remove_document(
+ request.user,
+ request.document_id
+ )
+
+ print("Remove complete", flush=True)
return LibrarianResponse(
error = None,
- document = None,
- info = info,
+ document_metadata = None,
+ content = None,
+ document_metadatas = None,
+ processing_metadatas = None,
)
- def handle_triples(self, m):
- self.table_store.add_triples(m)
+ async def update_document(self, request):
- def handle_graph_embeddings(self, m):
- self.table_store.add_graph_embeddings(m)
+ print("Updating doc...")
- def handle_document_embeddings(self, m):
- self.table_store.add_document_embeddings(m)
-
+ # You can't update the document ID, user or kind.
- def handle_triples(self, m):
- self.table_store.add_triples(m)
+ if not await self.table_store.document_exists(
+ request.document_metadata.user,
+ request.document_metadata.id
+ ):
+ raise RuntimeError("Document does not exist")
- def handle_graph_embeddings(self, m):
- self.table_store.add_graph_embeddings(m)
+ await self.table_store.update_document(request.document_metadata)
- def handle_document_embeddings(self, m):
- self.table_store.add_document_embeddings(m)
+ print("Update complete", flush=True)
+
+ return LibrarianResponse(
+ error = None,
+ document_metadata = None,
+ content = None,
+ document_metadatas = None,
+ processing_metadatas = None,
+ )
+
+ async def get_document_metadata(self, request):
+
+ print("Get doc...")
+
+ doc = await self.table_store.get_document(
+ request.user,
+ request.document_id
+ )
+
+ print("Get complete", flush=True)
+
+ return LibrarianResponse(
+ error = None,
+ document_metadata = doc,
+ content = None,
+ document_metadatas = None,
+ processing_metadatas = None,
+ )
+
+ async def get_document_content(self, request):
+
+ print("Get doc content...")
+
+ object_id = await self.table_store.get_document_object_id(
+ request.user,
+ request.document_id
+ )
+
+ content = await self.blob_store.get(
+ object_id
+ )
+
+ print("Get complete", flush=True)
+
+ return LibrarianResponse(
+ error = None,
+ document_metadata = None,
+ content = base64.b64encode(content),
+ document_metadatas = None,
+ processing_metadatas = None,
+ )
+
+ async def add_processing(self, request):
+
+ print("Add processing")
+
+ if await self.table_store.processing_exists(
+ request.processing_metadata.user,
+ request.processing_metadata.id
+ ):
+ raise RuntimeError("Processing already exists")
+
+ doc = await self.table_store.get_document(
+ request.processing_metadata.user,
+ request.processing_metadata.document_id
+ )
+
+ object_id = await self.table_store.get_document_object_id(
+ request.processing_metadata.user,
+ request.processing_metadata.document_id
+ )
+
+ content = await self.blob_store.get(
+ object_id
+ )
+
+ print("Got content")
+
+ print("Add processing...")
+
+ await self.table_store.add_processing(request.processing_metadata)
+
+ print("Invoke document processing...")
+
+ await self.load_document(
+ document = doc,
+ processing = request.processing_metadata,
+ content = content,
+ )
+
+ print("Add complete", flush=True)
+
+ return LibrarianResponse(
+ error = None,
+ document_metadata = None,
+ content = None,
+ document_metadatas = None,
+ processing_metadatas = None,
+ )
+
+ async def remove_processing(self, request):
+
+ print("Removing processing...")
+
+ if not await self.table_store.processing_exists(
+ request.user,
+ request.processing_id,
+ ):
+ raise RuntimeError("Processing object does not exist")
+
+ # Remove doc table row
+ await self.table_store.remove_processing(
+ request.user,
+ request.processing_id
+ )
+
+ print("Remove complete", flush=True)
+
+ return LibrarianResponse(
+ error = None,
+ document_metadata = None,
+ content = None,
+ document_metadatas = None,
+ processing_metadatas = None,
+ )
+
+ async def list_documents(self, request):
+
+ docs = await self.table_store.list_documents(request.user)
+
+ return LibrarianResponse(
+ error = None,
+ document_metadata = None,
+ content = None,
+ document_metadatas = docs,
+ processing_metadatas = None,
+ )
+
+ async def list_processing(self, request):
+
+ procs = await self.table_store.list_processing(request.user)
+
+ return LibrarianResponse(
+ error = None,
+ document_metadata = None,
+ content = None,
+ document_metadatas = None,
+ processing_metadatas = procs,
+ )
diff --git a/trustgraph-flow/trustgraph/librarian/service.py b/trustgraph-flow/trustgraph/librarian/service.py
index b42123a5..d1ce4805 100755
--- a/trustgraph-flow/trustgraph/librarian/service.py
+++ b/trustgraph-flow/trustgraph/librarian/service.py
@@ -5,41 +5,27 @@ Librarian service, manages documents in collections
from functools import partial
import asyncio
-import threading
-import queue
import base64
+import json
-from pulsar.schema import JsonSchema
+from .. base import AsyncProcessor, Consumer, Producer, Publisher, Subscriber
+from .. base import ConsumerMetrics, ProducerMetrics
from .. schema import LibrarianRequest, LibrarianResponse, Error
from .. schema import librarian_request_queue, librarian_response_queue
-from .. schema import GraphEmbeddings
-from .. schema import graph_embeddings_store_queue
-from .. schema import Triples
-from .. schema import triples_store_queue
-from .. schema import DocumentEmbeddings
-from .. schema import document_embeddings_store_queue
-
from .. schema import Document, Metadata
-from .. schema import document_ingest_queue
from .. schema import TextDocument, Metadata
-from .. schema import text_ingest_queue
-from .. base import Publisher
-from .. base import Subscriber
-
-from .. log_level import LogLevel
-from .. base import ConsumerProducer
from .. exceptions import RequestError
from . librarian import Librarian
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "librarian"
+
+default_librarian_request_queue = librarian_request_queue
+default_librarian_response_queue = librarian_response_queue
-default_input_queue = librarian_request_queue
-default_output_queue = librarian_response_queue
-default_subscriber = module
default_minio_host = "minio:9000"
default_minio_access_key = "minioadmin"
default_minio_secret_key = "minioadmin"
@@ -50,15 +36,21 @@ bucket_name = "library"
# FIXME: How to ensure this doesn't conflict with other usage?
keyspace = "librarian"
-class Processor(ConsumerProducer):
+class Processor(AsyncProcessor):
def __init__(self, **params):
- self.running = True
+ id = params.get("id")
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
+# self.running = True
+
+ librarian_request_queue = params.get(
+ "librarian_request_queue", default_librarian_request_queue
+ )
+
+ librarian_response_queue = params.get(
+ "librarian_response_queue", default_librarian_response_queue
+ )
minio_host = params.get("minio_host", default_minio_host)
minio_access_key = params.get(
@@ -74,19 +66,10 @@ class Processor(ConsumerProducer):
cassandra_user = params.get("cassandra_user")
cassandra_password = params.get("cassandra_password")
- triples_queue = params.get("triples_queue")
- graph_embeddings_queue = params.get("graph_embeddings_queue")
- document_embeddings_queue = params.get("document_embeddings_queue")
- document_load_queue = params.get("document_load_queue")
- text_load_queue = params.get("text_load_queue")
-
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": LibrarianRequest,
- "output_schema": LibrarianResponse,
+ "librarian_request_queue": librarian_request_queue,
+ "librarian_response_queue": librarian_response_queue,
"minio_host": minio_host,
"minio_access_key": minio_access_key,
"cassandra_host": cassandra_host,
@@ -94,38 +77,30 @@ class Processor(ConsumerProducer):
}
)
- self.document_load = Publisher(
- self.client, document_load_queue, JsonSchema(Document),
+ librarian_request_metrics = ConsumerMetrics(
+ processor = self.id, flow = None, name = "librarian-request"
)
- self.text_load = Publisher(
- self.client, text_load_queue, JsonSchema(TextDocument),
+ librarian_response_metrics = ProducerMetrics(
+ processor = self.id, flow = None, name = "librarian-response"
)
- self.triples_brk = Subscriber(
- self.client, triples_store_queue,
- "librarian", "librarian",
- schema=JsonSchema(Triples),
- )
- self.graph_embeddings_brk = Subscriber(
- self.client, graph_embeddings_store_queue,
- "librarian", "librarian",
- schema=JsonSchema(GraphEmbeddings),
- )
- self.document_embeddings_brk = Subscriber(
- self.client, document_embeddings_store_queue,
- "librarian", "librarian",
- schema=JsonSchema(DocumentEmbeddings),
+ self.librarian_request_consumer = Consumer(
+ taskgroup = self.taskgroup,
+ client = self.pulsar_client,
+ flow = None,
+ topic = librarian_request_queue,
+ subscriber = id,
+ schema = LibrarianRequest,
+ handler = self.on_librarian_request,
+ metrics = librarian_request_metrics,
)
- self.triples_reader = threading.Thread(
- target=self.receive_triples
- )
- self.graph_embeddings_reader = threading.Thread(
- target=self.receive_graph_embeddings
- )
- self.document_embeddings_reader = threading.Thread(
- target=self.receive_document_embeddings
+ self.librarian_response_producer = Producer(
+ client = self.pulsar_client,
+ topic = librarian_response_queue,
+ schema = LibrarianResponse,
+ metrics = librarian_response_metrics,
)
self.librarian = Librarian(
@@ -138,157 +113,123 @@ class Processor(ConsumerProducer):
bucket_name = bucket_name,
keyspace = keyspace,
load_document = self.load_document,
- load_text = self.load_text,
)
+ self.register_config_handler(self.on_librarian_config)
+
+ self.flows = {}
+
print("Initialised.", flush=True)
async def start(self):
-
- self.document_load.start()
- self.text_load.start()
- self.triples_brk.start()
- self.graph_embeddings_brk.start()
- self.document_embeddings_brk.start()
+ await super(Processor, self).start()
+ await self.librarian_request_consumer.start()
+ await self.librarian_response_producer.start()
- self.triples_sub = self.triples_brk.subscribe_all("x")
- self.graph_embeddings_sub = self.graph_embeddings_brk.subscribe_all("x")
- self.document_embeddings_sub = self.document_embeddings_brk.subscribe_all("x")
+ async def on_librarian_config(self, config, version):
- self.triples_reader.start()
- self.graph_embeddings_reader.start()
- self.document_embeddings_reader.start()
+ print("config version", version)
+
+ if "flows" in config:
+
+ self.flows = {
+ k: json.loads(v)
+ for k, v in config["flows"].items()
+ }
+
+ print(self.flows)
def __del__(self):
- self.running = False
+ pass
- if hasattr(self, "document_load"):
- self.document_load.stop()
- self.document_load.join()
+ async def load_document(self, document, processing, content):
- if hasattr(self, "text_load"):
- self.text_load.stop()
- self.text_load.join()
+ print("Ready for processing...")
- if hasattr(self, "triples_sub"):
- self.triples_sub.unsubscribe_all("x")
+ print(document, processing, len(content))
- if hasattr(self, "graph_embeddings_sub"):
- self.graph_embeddings_sub.unsubscribe_all("x")
+ if processing.flow not in self.flows:
+ raise RuntimeError("Invalid flow ID")
- if hasattr(self, "document_embeddings_sub"):
- self.document_embeddings_sub.unsubscribe_all("x")
+ flow = self.flows[processing.flow]
- if hasattr(self, "triples_brk"):
- self.triples_brk.stop()
- self.triples_brk.join()
+ if document.kind == "text/plain":
+ kind = "text-load"
+ elif document.kind == "application/pdf":
+ kind = "document-load"
+ else:
+ raise RuntimeError("Document with a MIME type I don't know")
- if hasattr(self, "graph_embeddings_brk"):
- self.graph_embeddings_brk.stop()
- self.graph_embeddings_brk.join()
+ q = flow["interfaces"][kind]
- if hasattr(self, "document_embeddings_brk"):
- self.document_embeddings_brk.stop()
- self.document_embeddings_brk.join()
+ if kind == "text-load":
+ doc = TextDocument(
+ metadata = Metadata(
+ id = document.id,
+ metadata = document.metadata,
+ user = processing.user,
+ collection = processing.collection
+ ),
+ text = content,
+ )
+ schema = TextDocument
+ else:
+ doc = Document(
+ metadata = Metadata(
+ id = document.id,
+ metadata = document.metadata,
+ user = processing.user,
+ collection = processing.collection
+ ),
+ data = base64.b64encode(content).decode("utf-8")
- def receive_triples(self):
+ )
+ schema = Document
- while self.running:
- try:
- msg = self.triples_sub.get(timeout=1)
- except queue.Empty:
- continue
+ print(f"Submit on queue {q}...")
- self.librarian.handle_triples(msg)
-
- def receive_graph_embeddings(self):
-
- while self.running:
- try:
- msg = self.graph_embeddings_sub.get(timeout=1)
- except queue.Empty:
- continue
-
- self.librarian.handle_graph_embeddings(msg)
-
- def receive_document_embeddings(self):
-
- while self.running:
- try:
- msg = self.document_embeddings_sub.get(timeout=1)
- except queue.Empty:
- continue
-
- self.librarian.handle_document_embeddings(msg)
-
- async def load_document(self, document):
-
- doc = Document(
- metadata = Metadata(
- id = document.id,
- metadata = document.metadata,
- user = document.user,
- collection = document.collection
- ),
- data = document.document
+ pub = Publisher(
+ self.pulsar_client, q, schema=schema
)
- self.document_load.send(None, doc)
+ await pub.start()
- async def load_text(self, document):
+ # FIXME: Time wait kludge?
+ await asyncio.sleep(1)
- text = base64.b64decode(document.document)
- text = text.decode("utf-8")
+ await pub.send(None, doc)
- doc = TextDocument(
- metadata = Metadata(
- id = document.id,
- metadata = document.metadata,
- user = document.user,
- collection = document.collection
- ),
- text = text,
- )
+ await pub.stop()
- self.text_load.send(None, doc)
+ print("Document submitted")
- def parse_request(self, v):
+ async def process_request(self, v):
if v.operation is None:
raise RequestError("Null operation")
- print("op", v.operation)
+ print("request", v.operation)
- if v.operation == "add":
- if (
- v.document and v.document.id and v.document.metadata and
- v.document.document and v.document.kind
- ):
- return partial(
- self.librarian.add,
- document = v.document,
- )
- else:
- raise RequestError("Invalid call")
+ impls = {
+ "add-document": self.librarian.add_document,
+ "remove-document": self.librarian.remove_document,
+ "update-document": self.librarian.update_document,
+ "get-document-metadata": self.librarian.get_document_metadata,
+ "get-document-content": self.librarian.get_document_content,
+ "add-processing": self.librarian.add_processing,
+ "remove-processing": self.librarian.remove_processing,
+ "list-documents": self.librarian.list_documents,
+ "list-processing": self.librarian.list_processing,
+ }
- if v.operation == "list":
- print("list", v)
- print(v.user)
- if v.user:
- return partial(
- self.librarian.list,
- user = v.user,
- collection = v.collection,
- )
- else:
- print("BROK")
- raise RequestError("Invalid call")
+ if v.operation not in impls:
+ raise RequestError(f"Invalid operation: {v.operation}")
- raise RequestError("Invalid operation: " + v.operation)
+ return await impls[v.operation](v)
- async def handle(self, msg):
+ async def on_librarian_request(self, msg, consumer, flow):
v = msg.value()
@@ -299,20 +240,15 @@ class Processor(ConsumerProducer):
print(f"Handling input {id}...", flush=True)
try:
- func = self.parse_request(v)
- except RequestError as e:
- resp = LibrarianResponse(
- error = Error(
- type = "request-error",
- message = str(e),
- )
+
+ resp = await self.process_request(v)
+
+ await self.librarian_response_producer.send(
+ resp, properties={"id": id}
)
- await self.send(resp, properties={"id": id})
+
return
- try:
- resp = await func()
- print("->", resp)
except RequestError as e:
resp = LibrarianResponse(
error = Error(
@@ -320,31 +256,43 @@ class Processor(ConsumerProducer):
message = str(e),
)
)
- await self.send(resp, properties={"id": id})
+
+ await self.librarian_response_producer.send(
+ resp, properties={"id": id}
+ )
+
return
except Exception as e:
- print("Exception:", e, flush=True)
resp = LibrarianResponse(
error = Error(
- type = "processing-error",
- message = "Unhandled error: " + str(e),
+ type = "unexpected-error",
+ message = str(e),
)
)
- await self.send(resp, properties={"id": id})
+
+ await self.librarian_response_producer.send(
+ resp, properties={"id": id}
+ )
+
return
- print("Send response..!.", flush=True)
-
- await self.send(resp, properties={"id": id})
-
print("Done.", flush=True)
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
+ AsyncProcessor.add_args(parser)
+
+ parser.add_argument(
+ '--librarian-request-queue',
+ default=default_librarian_request_queue,
+ help=f'Config request queue (default: {default_librarian_request_queue})'
+ )
+
+ parser.add_argument(
+ '--librarian-response-queue',
+ default=default_librarian_response_queue,
+ help=f'Config response queue {default_librarian_response_queue}',
)
parser.add_argument(
@@ -385,40 +333,7 @@ class Processor(ConsumerProducer):
help=f'Cassandra password'
)
- parser.add_argument(
- '--triples-queue',
- default=triples_store_queue,
- help=f'Triples queue (default: {triples_store_queue})'
- )
-
- parser.add_argument(
- '--graph-embeddings-queue',
- default=graph_embeddings_store_queue,
- help=f'Graph embeddings queue (default: {triples_store_queue})'
- )
-
- parser.add_argument(
- '--document-embeddings-queue',
- default=document_embeddings_store_queue,
- help='Document embeddings queue '
- f'(default: {document_embeddings_store_queue})'
- )
-
- parser.add_argument(
- '--document-load-queue',
- default=document_ingest_queue,
- help='Document load queue '
- f'(default: {document_ingest_queue})'
- )
-
- parser.add_argument(
- '--text-load-queue',
- default=text_ingest_queue,
- help='Text ingest queue '
- f'(default: {text_ingest_queue})'
- )
-
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/metering/counter.py b/trustgraph-flow/trustgraph/metering/counter.py
index 68ddf441..cb57d8af 100644
--- a/trustgraph-flow/trustgraph/metering/counter.py
+++ b/trustgraph-flow/trustgraph/metering/counter.py
@@ -3,23 +3,19 @@ Simple token counter for each LLM response.
"""
from prometheus_client import Counter
-from . pricelist import price_list
+import json
from .. schema import TextCompletionResponse, Error
-from .. schema import text_completion_response_queue
-from .. log_level import LogLevel
-from .. base import Consumer
+from .. base import FlowProcessor, ConsumerSpec
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "metering"
-default_input_queue = text_completion_response_queue
-default_subscriber = module
-
-
-class Processor(Consumer):
+class Processor(FlowProcessor):
def __init__(self, **params):
+ id = params.get("id", default_ident)
+
if not hasattr(__class__, "input_token_metric"):
__class__.input_token_metric = Counter(
'input_tokens', 'Input token count'
@@ -40,40 +36,61 @@ class Processor(Consumer):
'output_cost', 'Output cost'
)
- input_queue = params.get("input_queue", default_input_queue)
- subscriber = params.get("subscriber", default_subscriber)
-
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionResponse,
+ "id": id,
}
)
- def get_prices(self, prices, modelname):
- for model in prices["price_list"]:
- if model["model_name"] == modelname:
- return model["input_price"], model["output_price"]
+ self.register_config_handler(self.on_cost_config)
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "input",
+ schema = TextCompletionResponse,
+ handler = self.on_message,
+ )
+ )
+
+ self.prices = {}
+
+ self.config_key = "token-costs"
+
+ # Load token costs from the config service
+ async def on_cost_config(self, config, version):
+
+ print("Loading configuration version", version)
+
+ if self.config_key not in config:
+ print(f"No key {self.config_key} in config", flush=True)
+ return
+
+ config = config[self.config_key]
+
+ self.prices = {
+ k: json.loads(v)
+ for k, v in config.items()
+ }
+
+ def get_prices(self, modelname):
+
+ if modelname in self.prices:
+ model = self.prices[modelname]
+ return model["input_price"], model["output_price"]
return None, None # Return None if model is not found
- async def handle(self, msg):
+ async def on_message(self, msg, consumer, flow):
v = msg.value()
+
modelname = v.model
-
- # Sender-produced ID
- id = msg.properties()["id"]
-
- print(f"Handling response {id}...", flush=True)
-
num_in = v.in_token
num_out = v.out_token
__class__.input_token_metric.inc(num_in)
__class__.output_token_metric.inc(num_out)
- model_input_price, model_output_price = self.get_prices(price_list, modelname)
+ model_input_price, model_output_price = self.get_prices(modelname)
if model_input_price == None:
cost_per_call = f"Model Not Found in Price list"
@@ -92,10 +109,8 @@ class Processor(Consumer):
@staticmethod
def add_args(parser):
- Consumer.add_args(
- parser, default_input_queue, default_subscriber,
- )
+ FlowProcessor.add_args(parser)
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/metering/pricelist.py b/trustgraph-flow/trustgraph/metering/pricelist.py
deleted file mode 100644
index e890d0e1..00000000
--- a/trustgraph-flow/trustgraph/metering/pricelist.py
+++ /dev/null
@@ -1,104 +0,0 @@
-price_list = {
- "price_list": [
- {
- "model_name": "mistral.mistral-large-2407-v1:0",
- "input_price": 0.000004,
- "output_price": 0.000012
- },
- {
- "model_name": "meta.llama3-1-405b-instruct-v1:0",
- "input_price": 0.00000532,
- "output_price": 0.000016
- },
- {
- "model_name": "mistral.mixtral-8x7b-instruct-v0:1",
- "input_price": 0.00000045,
- "output_price": 0.0000007
- },
- {
- "model_name": "meta.llama3-1-70b-instruct-v1:0",
- "input_price": 0.00000099,
- "output_price": 0.00000099
- },
- {
- "model_name": "meta.llama3-1-8b-instruct-v1:0",
- "input_price": 0.00000022,
- "output_price": 0.00000022
- },
- {
- "model_name": "anthropic.claude-3-haiku-20240307-v1:0",
- "input_price": 0.00000025,
- "output_price": 0.00000125
- },
- {
- "model_name": "anthropic.claude-3-5-sonnet-20240620-v1:0",
- "input_price": 0.000003,
- "output_price": 0.000015
- },
- {
- "model_name": "cohere.command-r-plus-v1:0",
- "input_price": 0.0000030,
- "output_price": 0.0000150
- },
- {
- "model_name": "ollama",
- "input_price": 0,
- "output_price": 0
- },
- {
- "model_name": "claude-3-haiku-20240307",
- "input_price": 0.00000025,
- "output_price": 0.00000125
- },
- {
- "model_name": "claude-3-5-sonnet-20240620",
- "input_price": 0.000003,
- "output_price": 0.000015
- },
- {
- "model_name": "claude-3-opus-20240229",
- "input_price": 0.000015,
- "output_price": 0.000075
- },
- {
- "model_name": "claude-3-sonnet-20240229",
- "input_price": 0.000003,
- "output_price": 0.000015
- },
- {
- "model_name": "command-r-08-202",
- "input_price": 0.0000025,
- "output_price": 0.000010
- },
- {
- "model_name": "c4ai-aya-23-8b",
- "input_price": 0,
- "output_price": 0
- },
- {
- "model_name": "llama.cpp",
- "input_price": 0,
- "output_price": 0
- },
- {
- "model_name": "gpt-4o",
- "input_price": 0.000005,
- "output_price": 0.000015
- },
- {
- "model_name": "gpt-4o-2024-08-06",
- "input_price": 0.0000025,
- "output_price": 0.000010
- },
- {
- "model_name": "gpt-4o-2024-05-13",
- "input_price": 0.000005,
- "output_price": 0.000015
- },
- {
- "model_name": "gpt-4o-mini",
- "input_price": 0.00000015,
- "output_price": 0.0000006
- },
- ]
-}
\ No newline at end of file
diff --git a/trustgraph-flow/trustgraph/model/prompt/generic/service.py b/trustgraph-flow/trustgraph/model/prompt/generic/service.py
index b143b759..b10da491 100755
--- a/trustgraph-flow/trustgraph/model/prompt/generic/service.py
+++ b/trustgraph-flow/trustgraph/model/prompt/generic/service.py
@@ -27,7 +27,7 @@ from .... clients.llm_client import LlmClient
from . prompts import to_definitions, to_relationships, to_topics
from . prompts import to_kg_query, to_document_query, to_rows
-module = ".".join(__name__.split(".")[1:-1])
+module = "prompt"
default_input_queue = prompt_request_queue
default_output_queue = prompt_response_queue
diff --git a/trustgraph-flow/trustgraph/model/prompt/template/prompt_manager.py b/trustgraph-flow/trustgraph/model/prompt/template/prompt_manager.py
index d8a032ca..c5c32395 100644
--- a/trustgraph-flow/trustgraph/model/prompt/template/prompt_manager.py
+++ b/trustgraph-flow/trustgraph/model/prompt/template/prompt_manager.py
@@ -4,8 +4,6 @@ import json
from jsonschema import validate
import re
-from trustgraph.clients.llm_client import LlmClient
-
class PromptConfiguration:
def __init__(self, system_template, global_terms={}, prompts={}):
self.system_template = system_template
@@ -21,8 +19,7 @@ class Prompt:
class PromptManager:
- def __init__(self, llm, config):
- self.llm = llm
+ def __init__(self, config):
self.config = config
self.terms = config.global_terms
@@ -54,7 +51,9 @@ class PromptManager:
return json.loads(json_str)
- def invoke(self, id, input):
+ async def invoke(self, id, input, llm):
+
+ print("Invoke...", flush=True)
if id not in self.prompts:
raise RuntimeError("ID invalid")
@@ -68,9 +67,7 @@ class PromptManager:
"prompt": self.templates[id].render(terms)
}
- resp = self.llm.request(**prompt)
-
- print(resp, flush=True)
+ resp = await llm(**prompt)
if resp_type == "text":
return resp
@@ -81,13 +78,13 @@ class PromptManager:
try:
obj = self.parse_json(resp)
except:
+ print("Parse fail:", resp, flush=True)
raise RuntimeError("JSON parse fail")
- print(obj, flush=True)
if self.prompts[id].schema:
try:
- print(self.prompts[id].schema)
validate(instance=obj, schema=self.prompts[id].schema)
+ print("Validated", flush=True)
except Exception as e:
raise RuntimeError(f"Schema validation fail: {e}")
diff --git a/trustgraph-flow/trustgraph/model/prompt/template/service.py b/trustgraph-flow/trustgraph/model/prompt/template/service.py
index 58657d7d..67590c1c 100755
--- a/trustgraph-flow/trustgraph/model/prompt/template/service.py
+++ b/trustgraph-flow/trustgraph/model/prompt/template/service.py
@@ -3,6 +3,7 @@
Language service abstracts prompt engineering from LLM.
"""
+import asyncio
import json
import re
@@ -10,153 +11,107 @@ from .... schema import Definition, Relationship, Triple
from .... schema import Topic
from .... schema import PromptRequest, PromptResponse, Error
from .... schema import TextCompletionRequest, TextCompletionResponse
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... schema import prompt_request_queue, prompt_response_queue
-from .... base import ConsumerProducer
-from .... clients.llm_client import LlmClient
+
+from .... base import FlowProcessor
+from .... base import ProducerSpec, ConsumerSpec, TextCompletionClientSpec
from . prompt_manager import PromptConfiguration, Prompt, PromptManager
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "prompt"
-default_input_queue = prompt_request_queue
-default_output_queue = prompt_response_queue
-default_subscriber = module
-
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- prompt_base = {}
+ id = params.get("id")
- # Parsing the prompt information to the prompt configuration
- # structure
- prompt_arg = params.get("prompt", [])
- if prompt_arg:
- for p in prompt_arg:
- toks = p.split("=", 1)
- if len(toks) < 2:
- raise RuntimeError(f"Prompt string not well-formed: {p}")
- prompt_base[toks[0]] = {
- "template": toks[1]
- }
-
- prompt_response_type_arg = params.get("prompt_response_type", [])
- if prompt_response_type_arg:
- for p in prompt_response_type_arg:
- toks = p.split("=", 1)
- if len(toks) < 2:
- raise RuntimeError(f"Response type not well-formed: {p}")
- if toks[0] not in prompt_base:
- raise RuntimeError(f"Response-type, {toks[0]} not known")
- prompt_base[toks[0]]["response_type"] = toks[1]
-
- prompt_schema_arg = params.get("prompt_schema", [])
- if prompt_schema_arg:
- for p in prompt_schema_arg:
- toks = p.split("=", 1)
- if len(toks) < 2:
- raise RuntimeError(f"Schema arg not well-formed: {p}")
- if toks[0] not in prompt_base:
- raise RuntimeError(f"Schema, {toks[0]} not known")
- try:
- prompt_base[toks[0]]["schema"] = json.loads(toks[1])
- except:
- raise RuntimeError(f"Failed to parse JSON schema: {p}")
-
- prompt_term_arg = params.get("prompt_term", [])
- if prompt_term_arg:
- for p in prompt_term_arg:
- toks = p.split("=", 1)
- if len(toks) < 2:
- raise RuntimeError(f"Term arg not well-formed: {p}")
- if toks[0] not in prompt_base:
- raise RuntimeError(f"Term, {toks[0]} not known")
- kvtoks = toks[1].split(":", 1)
- if len(kvtoks) < 2:
- raise RuntimeError(f"Term not well-formed: {toks[1]}")
- k, v = kvtoks
- if "terms" not in prompt_base[toks[0]]:
- prompt_base[toks[0]]["terms"] = {}
- prompt_base[toks[0]]["terms"][k] = v
-
- global_terms = {}
-
- global_term_arg = params.get("global_term", [])
- if global_term_arg:
- for t in global_term_arg:
- toks = t.split("=", 1)
- if len(toks) < 2:
- raise RuntimeError(f"Global term arg not well-formed: {t}")
- global_terms[toks[0]] = toks[1]
-
- print(global_terms)
-
- prompts = {
- k: Prompt(**v)
- for k, v in prompt_base.items()
- }
-
- prompt_configuration = PromptConfiguration(
- system_template = params.get("system_prompt", ""),
- global_terms = global_terms,
- prompts = prompts
- )
-
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
- tc_request_queue = params.get(
- "text_completion_request_queue", text_completion_request_queue
- )
- tc_response_queue = params.get(
- "text_completion_response_queue", text_completion_response_queue
- )
- definition_template = params.get("definition_template")
- relationship_template = params.get("relationship_template")
- topic_template = params.get("topic_template")
- rows_template = params.get("rows_template")
- knowledge_query_template = params.get("knowledge_query_template")
- document_query_template = params.get("document_query_template")
+ # Config key for prompts
+ self.config_key = params.get("config_type", "prompt")
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": PromptRequest,
- "output_schema": PromptResponse,
- "text_completion_request_queue": tc_request_queue,
- "text_completion_response_queue": tc_response_queue,
+ "id": id,
}
)
- self.llm = LlmClient(
- subscriber=subscriber,
- input_queue=tc_request_queue,
- output_queue=tc_response_queue,
- pulsar_host = self.pulsar_host,
- pulsar_api_key=self.pulsar_api_key,
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = PromptRequest,
+ handler = self.on_request
+ )
)
- # System prompt hack
- class Llm:
- def __init__(self, llm):
- self.llm = llm
- def request(self, system, prompt):
- print(system)
- print(prompt, flush=True)
- return self.llm.request(system, prompt)
+ self.register_specification(
+ TextCompletionClientSpec(
+ request_name = "text-completion-request",
+ response_name = "text-completion-response",
+ )
+ )
- self.llm = Llm(self.llm)
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = PromptResponse
+ )
+ )
+ self.register_config_handler(self.on_prompt_config)
+
+ # Null configuration, should reload quickly
self.manager = PromptManager(
- llm = self.llm,
- config = prompt_configuration,
+ config = PromptConfiguration("", {}, {})
)
- async def handle(self, msg):
+ async def on_prompt_config(self, config, version):
+
+ print("Loading configuration version", version)
+
+ if self.config_key not in config:
+ print(f"No key {self.config_key} in config", flush=True)
+ return
+
+ config = config[self.config_key]
+
+ try:
+
+ system = json.loads(config["system"])
+ ix = json.loads(config["template-index"])
+
+ prompts = {}
+
+ for k in ix:
+
+ pc = config[f"template.{k}"]
+ data = json.loads(pc)
+
+ prompt = data.get("prompt")
+ rtype = data.get("response-type", "text")
+ schema = data.get("schema", None)
+
+ prompts[k] = Prompt(
+ template = prompt,
+ response_type = rtype,
+ schema = schema,
+ terms = {}
+ )
+
+ self.manager = PromptManager(
+ PromptConfiguration(
+ system,
+ {},
+ prompts
+ )
+ )
+
+ print("Prompt configuration reloaded.", flush=True)
+
+ except Exception as e:
+
+ print("Exception:", e, flush=True)
+ print("Configuration reload failed", flush=True)
+
+ async def on_request(self, msg, consumer, flow):
v = msg.value()
@@ -168,7 +123,7 @@ class Processor(ConsumerProducer):
try:
- print(v.terms)
+ print(v.terms, flush=True)
input = {
k: json.loads(v)
@@ -176,14 +131,33 @@ class Processor(ConsumerProducer):
}
print(f"Handling kind {kind}...", flush=True)
- print(input, flush=True)
- resp = self.manager.invoke(kind, input)
+ async def llm(system, prompt):
+
+ print(system, flush=True)
+ print(prompt, flush=True)
+
+ resp = await flow("text-completion-request").text_completion(
+ system = system, prompt = prompt,
+ )
+
+ try:
+ return resp
+ except Exception as e:
+ print("LLM Exception:", e, flush=True)
+ return None
+
+ try:
+ resp = await self.manager.invoke(kind, input, llm)
+ except Exception as e:
+ print("Invocation exception:", e, flush=True)
+ raise e
+
+ print(resp, flush=True)
if isinstance(resp, str):
print("Send text response...", flush=True)
- print(resp, flush=True)
r = PromptResponse(
text=resp,
@@ -191,7 +165,7 @@ class Processor(ConsumerProducer):
error=None,
)
- await self.send(r, properties={"id": id})
+ await flow("response").send(r, properties={"id": id})
return
@@ -206,13 +180,13 @@ class Processor(ConsumerProducer):
error=None,
)
- await self.send(r, properties={"id": id})
+ await flow("response").send(r, properties={"id": id})
return
except Exception as e:
- print(f"Exception: {e}")
+ print(f"Exception: {e}", flush=True)
print("Send error response...", flush=True)
@@ -224,11 +198,11 @@ class Processor(ConsumerProducer):
response=None,
)
- await self.send(r, properties={"id": id})
+ await flow("response").send(r, properties={"id": id})
except Exception as e:
- print(f"Exception: {e}")
+ print(f"Exception: {e}", flush=True)
print("Send error response...", flush=True)
@@ -245,54 +219,15 @@ class Processor(ConsumerProducer):
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ FlowProcessor.add_args(parser)
parser.add_argument(
- '--text-completion-request-queue',
- default=text_completion_request_queue,
- help=f'Text completion request queue (default: {text_completion_request_queue})',
- )
-
- parser.add_argument(
- '--text-completion-response-queue',
- default=text_completion_response_queue,
- help=f'Text completion response queue (default: {text_completion_response_queue})',
- )
-
- parser.add_argument(
- '--prompt', nargs='*',
- help=f'Prompt template form id=template',
- )
-
- parser.add_argument(
- '--prompt-response-type', nargs='*',
- help=f'Prompt response type, form id=json|text',
- )
-
- parser.add_argument(
- '--prompt-term', nargs='*',
- help=f'Prompt response type, form id=key:value',
- )
-
- parser.add_argument(
- '--prompt-schema', nargs='*',
- help=f'Prompt response schema, form id=schema',
- )
-
- parser.add_argument(
- '--system-prompt',
- help=f'System prompt template',
- )
-
- parser.add_argument(
- '--global-term', nargs='+',
- help=f'Global term, form key:value'
+ '--config-type',
+ default="prompt",
+ help=f'Configuration key for prompts (default: prompt)',
)
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/azure/llm.py b/trustgraph-flow/trustgraph/model/text_completion/azure/llm.py
index 33840378..70b07606 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/azure/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/azure/llm.py
@@ -9,31 +9,21 @@ import json
from prometheus_client import Histogram
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_temperature = 0.0
default_max_output = 4192
default_model = "AzureAI"
default_endpoint = os.getenv("AZURE_ENDPOINT")
default_token = os.getenv("AZURE_TOKEN")
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
endpoint = params.get("endpoint", default_endpoint)
token = params.get("token", default_token)
temperature = params.get("temperature", default_temperature)
@@ -48,30 +38,13 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
+ "endpoint": endpoint,
"temperature": temperature,
"max_output": max_output,
"model": model,
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.endpoint = endpoint
self.token = token
self.temperature = temperature
@@ -123,25 +96,16 @@ class Processor(ConsumerProducer):
return result
- async def handle(self, msg):
-
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
+ async def generate_content(self, system, prompt):
try:
prompt = self.build_prompt(
- v.system,
- v.prompt
+ system,
+ prompt
)
- with __class__.text_completion_metric.time():
- response = self.call_llm(prompt)
+ response = self.call_llm(prompt)
resp = response['choices'][0]['message']['content']
inputtokens = response['usage']['prompt_tokens']
@@ -153,8 +117,14 @@ class Processor(ConsumerProducer):
print("Send response...", flush=True)
- r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
- await self.send(r, properties={"id": id})
+ resp = LlmResult(
+ text = resp,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
+ )
+
+ return resp
except TooManyRequests:
@@ -168,33 +138,14 @@ class Processor(ConsumerProducer):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
print("Done.", flush=True)
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-e', '--endpoint',
@@ -224,4 +175,4 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py b/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py
index 252d58ad..c5dd097c 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py
@@ -9,18 +9,11 @@ from prometheus_client import Histogram
from openai import AzureOpenAI, RateLimitError
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_temperature = 0.0
default_max_output = 4192
default_api = "2024-12-01-preview"
@@ -28,13 +21,10 @@ default_endpoint = os.getenv("AZURE_ENDPOINT", None)
default_token = os.getenv("AZURE_TOKEN", None)
default_model = os.getenv("AZURE_MODEL", None)
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
temperature = params.get("temperature", default_temperature)
max_output = params.get("max_output", default_max_output)
@@ -51,11 +41,6 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"temperature": temperature,
"max_output": max_output,
"model": model,
@@ -63,19 +48,6 @@ class Processor(ConsumerProducer):
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.temperature = temperature
self.max_output = max_output
self.model = model
@@ -84,41 +56,31 @@ class Processor(ConsumerProducer):
api_key=token,
api_version=api,
azure_endpoint = endpoint,
- )
+ )
- async def handle(self, msg):
-
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
-
- prompt = v.system + "\n\n" + v.prompt
+ async def generate_content(self, system, prompt):
+ prompt = system + "\n\n" + prompt
try:
- with __class__.text_completion_metric.time():
- resp = self.openai.chat.completions.create(
- model=self.model,
- messages=[
- {
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": prompt
- }
- ]
- }
- ],
- temperature=self.temperature,
- max_tokens=self.max_output,
- top_p=1,
- )
+ resp = self.openai.chat.completions.create(
+ model=self.model,
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": prompt
+ }
+ ]
+ }
+ ],
+ temperature=self.temperature,
+ max_tokens=self.max_output,
+ top_p=1,
+ )
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
@@ -127,15 +89,14 @@ class Processor(ConsumerProducer):
print(f"Output Tokens: {outputtokens}", flush=True)
print("Send response...", flush=True)
- r = TextCompletionResponse(
- response=resp.choices[0].message.content,
- error=None,
- in_token=inputtokens,
- out_token=outputtokens,
- model=self.model
+ r = LlmResult(
+ text = resp.choices[0].message.content,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
)
- await self.send(r, properties={"id": id})
+ return r
except RateLimitError:
@@ -147,35 +108,15 @@ class Processor(ConsumerProducer):
except Exception as e:
# Apart from rate limits, treat all exceptions as unrecoverable
-
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
print("Done.", flush=True)
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-e', '--endpoint',
@@ -217,4 +158,4 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/claude/llm.py b/trustgraph-flow/trustgraph/model/text_completion/claude/llm.py
index 195a39e4..e69c2095 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/claude/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/claude/llm.py
@@ -5,33 +5,22 @@ Input is prompt, output is response.
"""
import anthropic
-from prometheus_client import Histogram
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'claude-3-5-sonnet-20240620'
default_temperature = 0.0
default_max_output = 8192
default_api_key = os.getenv("CLAUDE_KEY")
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key", default_api_key)
temperature = params.get("temperature", default_temperature)
@@ -42,30 +31,12 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"model": model,
"temperature": temperature,
"max_output": max_output,
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.model = model
self.claude = anthropic.Anthropic(api_key=api_key)
self.temperature = temperature
@@ -73,39 +44,27 @@ class Processor(ConsumerProducer):
print("Initialised", flush=True)
- async def handle(self, msg):
-
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
-
- prompt = v.prompt
+ async def generate_content(self, system, prompt):
try:
- with __class__.text_completion_metric.time():
-
- response = message = self.claude.messages.create(
- model=self.model,
- max_tokens=self.max_output,
- temperature=self.temperature,
- system = v.system,
- messages=[
- {
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": prompt
- }
- ]
- }
- ]
- )
+ response = message = self.claude.messages.create(
+ model=self.model,
+ max_tokens=self.max_output,
+ temperature=self.temperature,
+ system = system,
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": prompt
+ }
+ ]
+ }
+ ]
+ )
resp = response.content[0].text
inputtokens = response.usage.input_tokens
@@ -114,17 +73,14 @@ class Processor(ConsumerProducer):
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
- print("Send response...", flush=True)
- r = TextCompletionResponse(
- response=resp,
- error=None,
- in_token=inputtokens,
- out_token=outputtokens,
- model=self.model
+ resp = LlmResult(
+ text = resp,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
)
- self.send(r, properties={"id": id})
- print("Done.", flush=True)
+ return resp
except anthropic.RateLimitError:
@@ -136,31 +92,12 @@ class Processor(ConsumerProducer):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -189,7 +126,5 @@ class Processor(ConsumerProducer):
)
def run():
-
- Processor.launch(module, __doc__)
-
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/cohere/llm.py b/trustgraph-flow/trustgraph/model/text_completion/cohere/llm.py
index d5dab142..8e583040 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/cohere/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/cohere/llm.py
@@ -8,29 +8,19 @@ import cohere
from prometheus_client import Histogram
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'c4ai-aya-23-8b'
default_temperature = 0.0
default_api_key = os.getenv("COHERE_KEY")
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key", default_api_key)
temperature = params.get("temperature", default_temperature)
@@ -40,61 +30,30 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"model": model,
"temperature": temperature,
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.model = model
self.temperature = temperature
self.cohere = cohere.Client(api_key=api_key)
print("Initialised", flush=True)
- async def handle(self, msg):
-
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
-
- system = v.system
- prompt = v.prompt
+ async def generate_content(self, system, prompt):
try:
- with __class__.text_completion_metric.time():
-
- output = self.cohere.chat(
- model=self.model,
- message=prompt,
- preamble = system,
- temperature=self.temperature,
- chat_history=[],
- prompt_truncation='auto',
- connectors=[]
- )
+ output = self.cohere.chat(
+ model=self.model,
+ message=prompt,
+ preamble = system,
+ temperature=self.temperature,
+ chat_history=[],
+ prompt_truncation='auto',
+ connectors=[]
+ )
resp = output.text
inputtokens = int(output.meta.billed_units.input_tokens)
@@ -104,11 +63,14 @@ class Processor(ConsumerProducer):
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
- print("Send response...", flush=True)
- r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
- self.await send(r, properties={"id": id})
+ resp = LlmResult(
+ text = resp,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
+ )
- print("Done.", flush=True)
+ return resp
# FIXME: Wrong exception, don't know what this LLM throws
# for a rate limit
@@ -122,31 +84,12 @@ class Processor(ConsumerProducer):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -168,7 +111,5 @@ class Processor(ConsumerProducer):
)
def run():
-
- Processor.launch(module, __doc__)
-
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/googleaistudio/llm.py b/trustgraph-flow/trustgraph/model/text_completion/googleaistudio/llm.py
index 98ecaf0e..051e2fe5 100644
--- a/trustgraph-flow/trustgraph/model/text_completion/googleaistudio/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/googleaistudio/llm.py
@@ -10,30 +10,20 @@ from google.api_core.exceptions import ResourceExhausted
from prometheus_client import Histogram
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'gemini-1.5-flash-002'
default_temperature = 0.0
default_max_output = 8192
default_api_key = os.getenv("GOOGLE_AI_STUDIO_KEY")
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key", default_api_key)
temperature = params.get("temperature", default_temperature)
@@ -44,30 +34,12 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"model": model,
"temperature": temperature,
"max_output": max_output,
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
genai.configure(api_key=api_key)
self.model = model
self.temperature = temperature
@@ -102,15 +74,7 @@ class Processor(ConsumerProducer):
print("Initialised", flush=True)
- async def handle(self, msg):
-
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
+ async def generate_content(self, system, prompt):
# FIXME: There's a system prompt above. Maybe if system changes,
# then reset self.llm? It shouldn't do, because system prompt
@@ -119,17 +83,15 @@ class Processor(ConsumerProducer):
# Or... could keep different LLM structures for different system
# prompts?
- prompt = v.system + "\n\n" + v.prompt
+ prompt = system + "\n\n" + prompt
try:
- with __class__.text_completion_metric.time():
-
- chat_session = self.llm.start_chat(
- history=[
- ]
- )
- response = chat_session.send_message(prompt)
+ chat_session = self.llm.start_chat(
+ history=[
+ ]
+ )
+ response = chat_session.send_message(prompt)
resp = response.text
inputtokens = int(response.usage_metadata.prompt_token_count)
@@ -138,17 +100,14 @@ class Processor(ConsumerProducer):
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
- print("Send response...", flush=True)
- r = TextCompletionResponse(
- response=resp,
- error=None,
- in_token=inputtokens,
- out_token=outputtokens,
- model=self.model
+ resp = LlmResult(
+ text = resp,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
)
- await self.send(r, properties={"id": id})
- print("Done.", flush=True)
+ return resp
except ResourceExhausted as e:
@@ -163,31 +122,12 @@ class Processor(ConsumerProducer):
print(type(e), flush=True)
print(f"Exception: {e}", flush=True)
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -216,7 +156,5 @@ class Processor(ConsumerProducer):
)
def run():
-
- Processor.launch(module, __doc__)
-
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/llamafile/llm.py b/trustgraph-flow/trustgraph/model/text_completion/llamafile/llm.py
index 483412a2..baede64c 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/llamafile/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/llamafile/llm.py
@@ -5,32 +5,22 @@ Input is prompt, output is response.
"""
from openai import OpenAI
-from prometheus_client import Histogram
+import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'LLaMA_CPP'
default_llamafile = os.getenv("LLAMAFILE_URL", "http://localhost:8080/v1")
default_temperature = 0.0
default_max_output = 4096
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
llamafile = params.get("llamafile", default_llamafile)
temperature = params.get("temperature", default_temperature)
@@ -38,11 +28,6 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"model": model,
"temperature": temperature,
"max_output": max_output,
@@ -50,19 +35,6 @@ class Processor(ConsumerProducer):
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.model = model
self.llamafile=llamafile
self.temperature = temperature
@@ -74,38 +46,26 @@ class Processor(ConsumerProducer):
print("Initialised", flush=True)
- async def handle(self, msg):
+ async def generate_content(self, system, prompt):
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
-
- prompt = v.system + "\n\n" + v.prompt
+ prompt = system + "\n\n" + prompt
try:
- # FIXME: Rate limits
-
- with __class__.text_completion_metric.time():
-
- resp = self.openai.chat.completions.create(
- model=self.model,
- messages=[
- {"role": "user", "content": prompt}
- ]
- #temperature=self.temperature,
- #max_tokens=self.max_output,
- #top_p=1,
- #frequency_penalty=0,
- #presence_penalty=0,
- #response_format={
- # "type": "text"
- #}
- )
+ resp = self.openai.chat.completions.create(
+ model=self.model,
+ messages=[
+ {"role": "user", "content": prompt}
+ ]
+ #temperature=self.temperature,
+ #max_tokens=self.max_output,
+ #top_p=1,
+ #frequency_penalty=0,
+ #presence_penalty=0,
+ #response_format={
+ # "type": "text"
+ #}
+ )
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
@@ -114,48 +74,26 @@ class Processor(ConsumerProducer):
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
- print("Send response...", flush=True)
- r = TextCompletionResponse(
- response=resp.choices[0].message.content,
- error=None,
- in_token=inputtokens,
- out_token=outputtokens,
- model="llama.cpp"
+ resp = LlmResult(
+ text = resp.choices[0].message.content,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = "llama.cpp",
)
- await self.send(r, properties={"id": id})
- print("Done.", flush=True)
+ return resp
# SLM, presumably there aren't rate limits
except Exception as e:
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -184,7 +122,5 @@ class Processor(ConsumerProducer):
)
def run():
-
- Processor.launch(module, __doc__)
-
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/lmstudio/llm.py b/trustgraph-flow/trustgraph/model/text_completion/lmstudio/llm.py
index 16ff2df4..db1ec00e 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/lmstudio/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/lmstudio/llm.py
@@ -5,33 +5,22 @@ Input is prompt, output is response.
"""
from openai import OpenAI
-from prometheus_client import Histogram
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'gemma3:9b'
default_url = os.getenv("LMSTUDIO_URL", "http://localhost:1234/")
default_temperature = 0.0
default_max_output = 4096
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
url = params.get("url", default_url)
temperature = params.get("temperature", default_temperature)
@@ -39,11 +28,6 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"model": model,
"temperature": temperature,
"max_output": max_output,
@@ -51,19 +35,6 @@ class Processor(ConsumerProducer):
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.model = model
self.url = url + "v1/"
self.temperature = temperature
@@ -75,42 +46,30 @@ class Processor(ConsumerProducer):
print("Initialised", flush=True)
- async def handle(self, msg):
+ async def generate_content(self, system, prompt):
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
-
- prompt = v.system + "\n\n" + v.prompt
+ prompt = system + "\n\n" + prompt
try:
- # FIXME: Rate limits
+ print(prompt)
- with __class__.text_completion_metric.time():
+ resp = self.openai.chat.completions.create(
+ model=self.model,
+ messages=[
+ {"role": "user", "content": prompt}
+ ]
+ #temperature=self.temperature,
+ #max_tokens=self.max_output,
+ #top_p=1,
+ #frequency_penalty=0,
+ #presence_penalty=0,
+ #response_format={
+ # "type": "text"
+ #}
+ )
- print(prompt)
-
- resp = self.openai.chat.completions.create(
- model=self.model,
- messages=[
- {"role": "user", "content": prompt}
- ]
- #temperature=self.temperature,
- #max_tokens=self.max_output,
- #top_p=1,
- #frequency_penalty=0,
- #presence_penalty=0,
- #response_format={
- # "type": "text"
- #}
- )
-
- print(resp)
+ print(resp)
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
@@ -119,48 +78,26 @@ class Processor(ConsumerProducer):
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
- print("Send response...", flush=True)
- r = TextCompletionResponse(
- response=resp.choices[0].message.content,
- error=None,
- in_token=inputtokens,
- out_token=outputtokens,
- model=self.model,
+ resp = LlmResult(
+ text = resp.choices[0].message.content,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
)
- await self.send(r, properties={"id": id})
- print("Done.", flush=True)
+ return resp
# SLM, presumably there aren't rate limits
except Exception as e:
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -189,5 +126,5 @@ class Processor(ConsumerProducer):
)
def run():
- Processor.launch(module, __doc__)
-
+
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py b/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py
index 45f1311c..0c5c1430 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py
@@ -5,33 +5,22 @@ Input is prompt, output is response.
"""
from mistralai import Mistral
-from prometheus_client import Histogram
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'ministral-8b-latest'
default_temperature = 0.0
default_max_output = 4096
default_api_key = os.getenv("MISTRAL_TOKEN")
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key", default_api_key)
temperature = params.get("temperature", default_temperature)
@@ -42,30 +31,12 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"model": model,
"temperature": temperature,
"max_output": max_output,
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.model = model
self.temperature = temperature
self.max_output = max_output
@@ -73,44 +44,34 @@ class Processor(ConsumerProducer):
print("Initialised", flush=True)
- async def handle(self, msg):
+ async def generate_content(self, system, prompt):
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
-
- prompt = v.system + "\n\n" + v.prompt
+ prompt = system + "\n\n" + prompt
try:
- with __class__.text_completion_metric.time():
-
- resp = self.mistral.chat.complete(
- model=self.model,
- messages=[
- {
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": prompt
- }
- ]
- }
- ],
- temperature=self.temperature,
- max_tokens=self.max_output,
- top_p=1,
- frequency_penalty=0,
- presence_penalty=0,
- response_format={
- "type": "text"
+ resp = self.mistral.chat.complete(
+ model=self.model,
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": prompt
+ }
+ ]
}
- )
+ ],
+ temperature=self.temperature,
+ max_tokens=self.max_output,
+ top_p=1,
+ frequency_penalty=0,
+ presence_penalty=0,
+ response_format={
+ "type": "text"
+ }
+ )
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
@@ -118,17 +79,14 @@ class Processor(ConsumerProducer):
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
- print("Send response...", flush=True)
- r = TextCompletionResponse(
- response=resp.choices[0].message.content,
- error=None,
- in_token=inputtokens,
- out_token=outputtokens,
- model=self.model
+ resp = LlmResult(
+ text = resp.choices[0].message.content,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
)
- await self.send(r, properties={"id": id})
- print("Done.", flush=True)
+ return resp
# FIXME: Wrong exception. The MistralAI library has retry logic
# so retry-able errors are retried transparently. It means we
@@ -148,31 +106,12 @@ class Processor(ConsumerProducer):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -201,7 +140,5 @@ class Processor(ConsumerProducer):
)
def run():
-
- Processor.launch(module, __doc__)
-
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/ollama/llm.py b/trustgraph-flow/trustgraph/model/text_completion/ollama/llm.py
index 6d825bac..6afe0aea 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/ollama/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/ollama/llm.py
@@ -5,87 +5,40 @@ Input is prompt, output is response.
"""
from ollama import Client
-from prometheus_client import Histogram, Info
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'gemma2:9b'
default_ollama = os.getenv("OLLAMA_HOST", 'http://localhost:11434')
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
ollama = params.get("ollama", default_ollama)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
"model": model,
"ollama": ollama,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
- if not hasattr(__class__, "model_metric"):
- __class__.model_metric = Info(
- 'model', 'Model information'
- )
-
- __class__.model_metric.info({
- "model": model,
- "ollama": ollama,
- })
-
self.model = model
self.llm = Client(host=ollama)
- async def handle(self, msg):
+ async def generate_content(self, system, prompt):
- v = msg.value()
-
- # Sender-produced ID
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
-
- prompt = v.system + "\n\n" + v.prompt
+ prompt = system + "\n\n" + prompt
try:
- with __class__.text_completion_metric.time():
- response = self.llm.generate(self.model, prompt)
+ response = self.llm.generate(self.model, prompt)
response_text = response['response']
print("Send response...", flush=True)
@@ -94,42 +47,26 @@ class Processor(ConsumerProducer):
inputtokens = int(response['prompt_eval_count'])
outputtokens = int(response['eval_count'])
- r = TextCompletionResponse(response=response_text, error=None, in_token=inputtokens, out_token=outputtokens, model="ollama")
+ resp = LlmResult(
+ text = response_text,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
+ )
- await self.send(r, properties={"id": id})
-
- print("Done.", flush=True)
+ return resp
# SLM, presumably no rate limits
except Exception as e:
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -145,6 +82,4 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
-
-
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py b/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
index 590c2e3f..a52f400e 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
@@ -5,34 +5,23 @@ Input is prompt, output is response.
"""
from openai import OpenAI, RateLimitError
-from prometheus_client import Histogram
import os
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'gpt-3.5-turbo'
default_temperature = 0.0
default_max_output = 4096
default_api_key = os.getenv("OPENAI_TOKEN")
default_base_url = os.getenv("OPENAI_BASE_URL", None)
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
api_key = params.get("api_key", default_api_key)
base_url = params.get("base_url", default_base_url)
@@ -44,11 +33,6 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
"model": model,
"temperature": temperature,
"max_output": max_output,
@@ -56,19 +40,6 @@ class Processor(ConsumerProducer):
}
)
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
-
self.model = model
self.temperature = temperature
self.max_output = max_output
@@ -76,44 +47,34 @@ class Processor(ConsumerProducer):
print("Initialised", flush=True)
- async def handle(self, msg):
+ async def generate_content(self, system, prompt):
- v = msg.value()
-
- # Sender-produced ID
-
- id = msg.properties()["id"]
-
- print(f"Handling prompt {id}...", flush=True)
-
- prompt = v.system + "\n\n" + v.prompt
+ prompt = system + "\n\n" + prompt
try:
- with __class__.text_completion_metric.time():
-
- resp = self.openai.chat.completions.create(
- model=self.model,
- messages=[
- {
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": prompt
- }
- ]
- }
- ],
- temperature=self.temperature,
- max_tokens=self.max_output,
- top_p=1,
- frequency_penalty=0,
- presence_penalty=0,
- response_format={
- "type": "text"
+ resp = self.openai.chat.completions.create(
+ model=self.model,
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": prompt
+ }
+ ]
}
- )
+ ],
+ temperature=self.temperature,
+ max_tokens=self.max_output,
+ top_p=1,
+ frequency_penalty=0,
+ presence_penalty=0,
+ response_format={
+ "type": "text"
+ }
+ )
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
@@ -121,17 +82,14 @@ class Processor(ConsumerProducer):
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
- print("Send response...", flush=True)
- r = TextCompletionResponse(
- response=resp.choices[0].message.content,
- error=None,
- in_token=inputtokens,
- out_token=outputtokens,
- model=self.model
+ resp = LlmResult(
+ text = resp.choices[0].message.content,
+ in_token = inputtokens,
+ out_token = outputtokens,
+ model = self.model
)
- await self.send(r, properties={"id": id})
- print("Done.", flush=True)
+ return resp
# FIXME: Wrong exception, don't know what this LLM throws
# for a rate limit
@@ -145,31 +103,12 @@ class Processor(ConsumerProducer):
# Apart from rate limits, treat all exceptions as unrecoverable
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
@@ -204,7 +143,5 @@ class Processor(ConsumerProducer):
)
def run():
-
- Processor.launch(module, __doc__)
-
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/milvus/service.py b/trustgraph-flow/trustgraph/query/doc_embeddings/milvus/service.py
index b16399e9..2fb416dd 100755
--- a/trustgraph-flow/trustgraph/query/doc_embeddings/milvus/service.py
+++ b/trustgraph-flow/trustgraph/query/doc_embeddings/milvus/service.py
@@ -11,7 +11,7 @@ from .... schema import document_embeddings_request_queue
from .... schema import document_embeddings_response_queue
from .... base import ConsumerProducer
-module = ".".join(__name__.split(".")[1:-1])
+module = "de-query"
default_input_queue = document_embeddings_request_queue
default_output_queue = document_embeddings_response_queue
diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
index 6a88671c..74c52055 100755
--- a/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
+++ b/trustgraph-flow/trustgraph/query/doc_embeddings/pinecone/service.py
@@ -16,7 +16,7 @@ from .... schema import document_embeddings_request_queue
from .... schema import document_embeddings_response_queue
from .... base import ConsumerProducer
-module = ".".join(__name__.split(".")[1:-1])
+module = "de-query"
default_input_queue = document_embeddings_request_queue
default_output_queue = document_embeddings_response_queue
diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py b/trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py
index 128203ad..c5543690 100755
--- a/trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py
+++ b/trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py
@@ -7,71 +7,51 @@ of chunks
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
-import uuid
-from .... schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
+from .... schema import DocumentEmbeddingsResponse
from .... schema import Error, Value
-from .... schema import document_embeddings_request_queue
-from .... schema import document_embeddings_response_queue
-from .... base import ConsumerProducer
+from .... base import DocumentEmbeddingsQueryService
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "de-query"
-default_input_queue = document_embeddings_request_queue
-default_output_queue = document_embeddings_response_queue
-default_subscriber = module
default_store_uri = 'http://localhost:6333'
-class Processor(ConsumerProducer):
+class Processor(DocumentEmbeddingsQueryService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
store_uri = params.get("store_uri", default_store_uri)
+
#optional api key
api_key = params.get("api_key", None)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": DocumentEmbeddingsRequest,
- "output_schema": DocumentEmbeddingsResponse,
"store_uri": store_uri,
"api_key": api_key,
}
)
- self.client = QdrantClient(url=store_uri, api_key=api_key)
+ self.qdrant = QdrantClient(url=store_uri, api_key=api_key)
- async def handle(self, msg):
+ async def query_document_embeddings(self, msg):
try:
- v = msg.value()
-
- # Sender-produced ID
- id = msg.properties()["id"]
-
- print(f"Handling input {id}...", flush=True)
-
chunks = []
- for vec in v.vectors:
+ for vec in msg.vectors:
dim = len(vec)
collection = (
- "d_" + v.user + "_" + v.collection + "_" +
+ "d_" + msg.user + "_" + msg.collection + "_" +
str(dim)
)
- search_result = self.client.query_points(
+ search_result = self.qdrant.query_points(
collection_name=collection,
query=vec,
- limit=v.limit,
+ limit=msg.limit,
with_payload=True,
).points
@@ -79,37 +59,17 @@ class Processor(ConsumerProducer):
ent = r.payload["doc"]
chunks.append(ent)
- print("Send response...", flush=True)
- r = DocumentEmbeddingsResponse(documents=chunks, error=None)
- await self.send(r, properties={"id": id})
-
- print("Done.", flush=True)
+ return chunks
except Exception as e:
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = DocumentEmbeddingsResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- documents=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ DocumentEmbeddingsQueryService.add_args(parser)
parser.add_argument(
'-t', '--store-uri',
@@ -125,5 +85,5 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/milvus/service.py b/trustgraph-flow/trustgraph/query/graph_embeddings/milvus/service.py
index 8dd8d04d..d2cec084 100755
--- a/trustgraph-flow/trustgraph/query/graph_embeddings/milvus/service.py
+++ b/trustgraph-flow/trustgraph/query/graph_embeddings/milvus/service.py
@@ -11,7 +11,7 @@ from .... schema import graph_embeddings_request_queue
from .... schema import graph_embeddings_response_queue
from .... base import ConsumerProducer
-module = ".".join(__name__.split(".")[1:-1])
+module = "ge-query"
default_input_queue = graph_embeddings_request_queue
default_output_queue = graph_embeddings_response_queue
diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py
index 90cfc6de..942a1e69 100755
--- a/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py
+++ b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py
@@ -16,7 +16,7 @@ from .... schema import graph_embeddings_request_queue
from .... schema import graph_embeddings_response_queue
from .... base import ConsumerProducer
-module = ".".join(__name__.split(".")[1:-1])
+module = "ge-query"
default_input_queue = graph_embeddings_request_queue
default_output_queue = graph_embeddings_response_queue
diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py b/trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py
index dc3e28f3..32da00e5 100755
--- a/trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py
+++ b/trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py
@@ -7,44 +7,32 @@ entities
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
-import uuid
-from .... schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
+from .... schema import GraphEmbeddingsResponse
from .... schema import Error, Value
-from .... schema import graph_embeddings_request_queue
-from .... schema import graph_embeddings_response_queue
-from .... base import ConsumerProducer
+from .... base import GraphEmbeddingsQueryService
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "ge-query"
-default_input_queue = graph_embeddings_request_queue
-default_output_queue = graph_embeddings_response_queue
-default_subscriber = module
default_store_uri = 'http://localhost:6333'
-class Processor(ConsumerProducer):
+class Processor(GraphEmbeddingsQueryService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
store_uri = params.get("store_uri", default_store_uri)
+
+ #optional api key
api_key = params.get("api_key", None)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": GraphEmbeddingsRequest,
- "output_schema": GraphEmbeddingsResponse,
"store_uri": store_uri,
"api_key": api_key,
}
)
- self.client = QdrantClient(url=store_uri, api_key=api_key)
+ self.qdrant = QdrantClient(url=store_uri, api_key=api_key)
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
@@ -52,34 +40,27 @@ class Processor(ConsumerProducer):
else:
return Value(value=ent, is_uri=False)
- async def handle(self, msg):
+ async def query_graph_embeddings(self, msg):
try:
- v = msg.value()
-
- # Sender-produced ID
- id = msg.properties()["id"]
-
- print(f"Handling input {id}...", flush=True)
-
entity_set = set()
entities = []
- for vec in v.vectors:
+ for vec in msg.vectors:
dim = len(vec)
collection = (
- "t_" + v.user + "_" + v.collection + "_" +
+ "t_" + msg.user + "_" + msg.collection + "_" +
str(dim)
)
# Heuristic hack, get (2*limit), so that we have more chance
# of getting (limit) entities
- search_result = self.client.query_points(
+ search_result = self.qdrant.query_points(
collection_name=collection,
query=vec,
- limit=v.limit * 2,
+ limit=msg.limit * 2,
with_payload=True,
).points
@@ -92,10 +73,10 @@ class Processor(ConsumerProducer):
entities.append(ent)
# Keep adding entities until limit
- if len(entity_set) >= v.limit: break
+ if len(entity_set) >= msg.limit: break
# Keep adding entities until limit
- if len(entity_set) >= v.limit: break
+ if len(entity_set) >= msg.limit: break
ents2 = []
@@ -105,36 +86,19 @@ class Processor(ConsumerProducer):
entities = ents2
print("Send response...", flush=True)
- r = GraphEmbeddingsResponse(entities=entities, error=None)
- await self.send(r, properties={"id": id})
+ return entities
print("Done.", flush=True)
except Exception as e:
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = GraphEmbeddingsResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- entities=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ GraphEmbeddingsQueryService.add_args(parser)
parser.add_argument(
'-t', '--store-uri',
@@ -150,5 +114,5 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/query/triples/cassandra/service.py b/trustgraph-flow/trustgraph/query/triples/cassandra/service.py
index e3687756..6fcf4a19 100755
--- a/trustgraph-flow/trustgraph/query/triples/cassandra/service.py
+++ b/trustgraph-flow/trustgraph/query/triples/cassandra/service.py
@@ -7,38 +7,24 @@ null. Output is a list of triples.
from .... direct.cassandra import TrustGraph
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
-from .... schema import triples_request_queue
-from .... schema import triples_response_queue
-from .... base import ConsumerProducer
+from .... base import TriplesQueryService
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "triples-query"
-default_input_queue = triples_request_queue
-default_output_queue = triples_response_queue
-default_subscriber = module
default_graph_host='localhost'
-class Processor(ConsumerProducer):
+class Processor(TriplesQueryService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
graph_host = params.get("graph_host", default_graph_host)
graph_username = params.get("graph_username", None)
graph_password = params.get("graph_password", None)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TriplesQueryRequest,
- "output_schema": TriplesQueryResponse,
"graph_host": graph_host,
"graph_username": graph_username,
- "graph_password": graph_password,
}
)
@@ -53,92 +39,85 @@ class Processor(ConsumerProducer):
else:
return Value(value=ent, is_uri=False)
- async def handle(self, msg):
+ async def query_triples(self, query):
try:
- v = msg.value()
-
- table = (v.user, v.collection)
+ table = (query.user, query.collection)
if table != self.table:
if self.username and self.password:
self.tg = TrustGraph(
hosts=self.graph_host,
- keyspace=v.user, table=v.collection,
+ keyspace=query.user, table=query.collection,
username=self.username, password=self.password
)
else:
self.tg = TrustGraph(
hosts=self.graph_host,
- keyspace=v.user, table=v.collection,
+ keyspace=query.user, table=query.collection,
)
self.table = table
- # Sender-produced ID
- id = msg.properties()["id"]
-
- print(f"Handling input {id}...", flush=True)
-
triples = []
- if v.s is not None:
- if v.p is not None:
- if v.o is not None:
+ if query.s is not None:
+ if query.p is not None:
+ if query.o is not None:
resp = self.tg.get_spo(
- v.s.value, v.p.value, v.o.value,
- limit=v.limit
+ query.s.value, query.p.value, query.o.value,
+ limit=query.limit
)
- triples.append((v.s.value, v.p.value, v.o.value))
+ triples.append((query.s.value, query.p.value, query.o.value))
else:
resp = self.tg.get_sp(
- v.s.value, v.p.value,
- limit=v.limit
+ query.s.value, query.p.value,
+ limit=query.limit
)
for t in resp:
- triples.append((v.s.value, v.p.value, t.o))
+ triples.append((query.s.value, query.p.value, t.o))
else:
- if v.o is not None:
+ if query.o is not None:
resp = self.tg.get_os(
- v.o.value, v.s.value,
- limit=v.limit
+ query.o.value, query.s.value,
+ limit=query.limit
)
for t in resp:
- triples.append((v.s.value, t.p, v.o.value))
+ triples.append((query.s.value, t.p, query.o.value))
else:
resp = self.tg.get_s(
- v.s.value,
- limit=v.limit
+ query.s.value,
+ limit=query.limit
)
for t in resp:
- triples.append((v.s.value, t.p, t.o))
+ triples.append((query.s.value, t.p, t.o))
else:
- if v.p is not None:
- if v.o is not None:
+ if query.p is not None:
+ if query.o is not None:
resp = self.tg.get_po(
- v.p.value, v.o.value,
- limit=v.limit
+ query.p.value, query.o.value,
+ limit=query.limit
)
for t in resp:
- triples.append((t.s, v.p.value, v.o.value))
+ triples.append((t.s, query.p.value, query.o.value))
else:
resp = self.tg.get_p(
- v.p.value,
- limit=v.limit
+ query.p.value,
+ limit=query.limit
)
for t in resp:
- triples.append((t.s, v.p.value, t.o))
+ triples.append((t.s, query.p.value, t.o))
else:
- if v.o is not None:
+ if query.o is not None:
resp = self.tg.get_o(
- v.o.value,
- limit=v.limit
+ query.o.value,
+ limit=query.limit
)
for t in resp:
- triples.append((t.s, t.p, v.o.value))
+ triples.append((t.s, t.p, query.o.value))
else:
resp = self.tg.get_all(
- limit=v.limit
+ limit=query.limit
)
for t in resp:
triples.append((t.s, t.p, t.o))
@@ -152,37 +131,17 @@ class Processor(ConsumerProducer):
for t in triples
]
- print("Send response...", flush=True)
- r = TriplesQueryResponse(triples=triples, error=None)
- await self.send(r, properties={"id": id})
-
- print("Done.", flush=True)
+ return triples
except Exception as e:
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TriplesQueryResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ TriplesQueryService.add_args(parser)
parser.add_argument(
'-g', '--graph-host',
@@ -205,5 +164,5 @@ class Processor(ConsumerProducer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/query/triples/falkordb/service.py b/trustgraph-flow/trustgraph/query/triples/falkordb/service.py
index 56fed6d3..c62c28c1 100755
--- a/trustgraph-flow/trustgraph/query/triples/falkordb/service.py
+++ b/trustgraph-flow/trustgraph/query/triples/falkordb/service.py
@@ -13,7 +13,7 @@ from .... schema import triples_request_queue
from .... schema import triples_response_queue
from .... base import ConsumerProducer
-module = ".".join(__name__.split(".")[1:-1])
+module = "triples-query"
default_input_queue = triples_request_queue
default_output_queue = triples_response_queue
diff --git a/trustgraph-flow/trustgraph/query/triples/memgraph/service.py b/trustgraph-flow/trustgraph/query/triples/memgraph/service.py
index f442c4ef..594c9130 100755
--- a/trustgraph-flow/trustgraph/query/triples/memgraph/service.py
+++ b/trustgraph-flow/trustgraph/query/triples/memgraph/service.py
@@ -13,7 +13,7 @@ from .... schema import triples_request_queue
from .... schema import triples_response_queue
from .... base import ConsumerProducer
-module = ".".join(__name__.split(".")[1:-1])
+module = "triples-query"
default_input_queue = triples_request_queue
default_output_queue = triples_response_queue
diff --git a/trustgraph-flow/trustgraph/query/triples/neo4j/service.py b/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
index 49ba0345..591361ce 100755
--- a/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
+++ b/trustgraph-flow/trustgraph/query/triples/neo4j/service.py
@@ -13,7 +13,7 @@ from .... schema import triples_request_queue
from .... schema import triples_response_queue
from .... base import ConsumerProducer
-module = ".".join(__name__.split(".")[1:-1])
+module = "triples-query"
default_input_queue = triples_request_queue
default_output_queue = triples_response_queue
diff --git a/trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py b/trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py
new file mode 100644
index 00000000..5e3c9b41
--- /dev/null
+++ b/trustgraph-flow/trustgraph/retrieval/document_rag/document_rag.py
@@ -0,0 +1,94 @@
+
+import asyncio
+
+LABEL="http://www.w3.org/2000/01/rdf-schema#label"
+
+class Query:
+
+ def __init__(
+ self, rag, user, collection, verbose,
+ doc_limit=20
+ ):
+ self.rag = rag
+ self.user = user
+ self.collection = collection
+ self.verbose = verbose
+ self.doc_limit = doc_limit
+
+ async def get_vector(self, query):
+
+ if self.verbose:
+ print("Compute embeddings...", flush=True)
+
+ qembeds = await self.rag.embeddings_client.embed(query)
+
+ if self.verbose:
+ print("Done.", flush=True)
+
+ return qembeds
+
+ async def get_docs(self, query):
+
+ vectors = await self.get_vector(query)
+
+ if self.verbose:
+ print("Get docs...", flush=True)
+
+ docs = await self.rag.doc_embeddings_client.query(
+ vectors, limit=self.doc_limit,
+ user=self.user, collection=self.collection,
+ )
+
+ if self.verbose:
+ print("Docs:", flush=True)
+ for doc in docs:
+ print(doc, flush=True)
+
+ return docs
+
+class DocumentRag:
+
+ def __init__(
+ self, prompt_client, embeddings_client, doc_embeddings_client,
+ verbose=False,
+ ):
+
+ self.verbose = verbose
+
+ self.prompt_client = prompt_client
+ self.embeddings_client = embeddings_client
+ self.doc_embeddings_client = doc_embeddings_client
+
+ if self.verbose:
+ print("Initialised", flush=True)
+
+ async def query(
+ self, query, user="trustgraph", collection="default",
+ doc_limit=20,
+ ):
+
+ if self.verbose:
+ print("Construct prompt...", flush=True)
+
+ q = Query(
+ rag=self, user=user, collection=collection, verbose=self.verbose,
+ doc_limit=doc_limit
+ )
+
+ docs = await q.get_docs(query)
+
+ if self.verbose:
+ print("Invoke LLM...", flush=True)
+ print(docs)
+ print(query)
+
+ resp = await self.prompt_client.document_prompt(
+ query = query,
+ documents = docs
+ )
+
+ if self.verbose:
+ print("Done", flush=True)
+
+ return resp
+
diff --git a/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py b/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py
index bb8b008e..8c478874 100755
--- a/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py
+++ b/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py
@@ -5,88 +5,77 @@ Input is query, output is response.
"""
from ... schema import DocumentRagQuery, DocumentRagResponse, Error
-from ... schema import document_rag_request_queue, document_rag_response_queue
-from ... schema import prompt_request_queue
-from ... schema import prompt_response_queue
-from ... schema import embeddings_request_queue
-from ... schema import embeddings_response_queue
-from ... schema import document_embeddings_request_queue
-from ... schema import document_embeddings_response_queue
-from ... log_level import LogLevel
-from ... document_rag import DocumentRag
-from ... base import ConsumerProducer
+from . document_rag import DocumentRag
+from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
+from ... base import PromptClientSpec, EmbeddingsClientSpec
+from ... base import DocumentEmbeddingsClientSpec
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "document-rag"
-default_input_queue = document_rag_request_queue
-default_output_queue = document_rag_response_queue
-default_subscriber = module
-
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
- pr_request_queue = params.get(
- "prompt_request_queue", prompt_request_queue
- )
- pr_response_queue = params.get(
- "prompt_response_queue", prompt_response_queue
- )
- emb_request_queue = params.get(
- "embeddings_request_queue", embeddings_request_queue
- )
- emb_response_queue = params.get(
- "embeddings_response_queue", embeddings_response_queue
- )
- de_request_queue = params.get(
- "document_embeddings_request_queue",
- document_embeddings_request_queue
- )
- de_response_queue = params.get(
- "document_embeddings_response_queue",
- document_embeddings_response_queue
- )
+ id = params.get("id", default_ident)
- doc_limit = params.get("doc_limit", 10)
+ doc_limit = params.get("doc_limit", 5)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": DocumentRagQuery,
- "output_schema": DocumentRagResponse,
- "prompt_request_queue": pr_request_queue,
- "prompt_response_queue": pr_response_queue,
- "embeddings_request_queue": emb_request_queue,
- "embeddings_response_queue": emb_response_queue,
- "document_embeddings_request_queue": de_request_queue,
- "document_embeddings_response_queue": de_response_queue,
+ "id": id,
+ "doc_limit": doc_limit,
}
)
- self.rag = DocumentRag(
- pulsar_host=self.pulsar_host,
- pulsar_api_key=self.pulsar_api_key,
- pr_request_queue=pr_request_queue,
- pr_response_queue=pr_response_queue,
- emb_request_queue=emb_request_queue,
- emb_response_queue=emb_response_queue,
- de_request_queue=de_request_queue,
- de_response_queue=de_response_queue,
- verbose=True,
- module=module,
- )
-
self.doc_limit = doc_limit
- async def handle(self, msg):
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = DocumentRagQuery,
+ handler = self.on_request,
+ )
+ )
+
+ self.register_specification(
+ EmbeddingsClientSpec(
+ request_name = "embeddings-request",
+ response_name = "embeddings-response",
+ )
+ )
+
+ self.register_specification(
+ DocumentEmbeddingsClientSpec(
+ request_name = "document-embeddings-request",
+ response_name = "document-embeddings-response",
+ )
+ )
+
+ self.register_specification(
+ PromptClientSpec(
+ request_name = "prompt-request",
+ response_name = "prompt-response",
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = DocumentRagResponse,
+ )
+ )
+
+ async def on_request(self, msg, consumer, flow):
try:
+ self.rag = DocumentRag(
+ embeddings_client = flow("embeddings-request"),
+ doc_embeddings_client = flow("document-embeddings-request"),
+ prompt_client = flow("prompt-request"),
+ verbose=True,
+ )
+
v = msg.value()
# Sender-produced ID
@@ -99,11 +88,15 @@ class Processor(ConsumerProducer):
else:
doc_limit = self.doc_limit
- response = self.rag.query(v.query, doc_limit=doc_limit)
+ response = await self.rag.query(v.query, doc_limit=doc_limit)
- print("Send response...", flush=True)
- r = DocumentRagResponse(response = response, error=None)
- await self.send(r, properties={"id": id})
+ await flow("response").send(
+ DocumentRagResponse(
+ response = response,
+ error = None
+ ),
+ properties = {"id": id}
+ )
print("Done.", flush=True)
@@ -113,25 +106,21 @@ class Processor(ConsumerProducer):
print("Send error response...", flush=True)
- r = DocumentRagResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
+ await flow("response").send(
+ DocumentRagResponse(
+ response = None,
+ error = Error(
+ type = "document-rag-error",
+ message = str(e),
+ ),
),
- response=None,
+ properties = {"id": id}
)
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
-
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ FlowProcessor.add_args(parser)
parser.add_argument(
'-d', '--doc-limit',
@@ -140,43 +129,7 @@ class Processor(ConsumerProducer):
help=f'Default document fetch limit (default: 10)'
)
- parser.add_argument(
- '--prompt-request-queue',
- default=prompt_request_queue,
- help=f'Prompt request queue (default: {prompt_request_queue})',
- )
-
- parser.add_argument(
- '--prompt-response-queue',
- default=prompt_response_queue,
- help=f'Prompt response queue (default: {prompt_response_queue})',
- )
-
- parser.add_argument(
- '--embeddings-request-queue',
- default=embeddings_request_queue,
- help=f'Embeddings request queue (default: {embeddings_request_queue})',
- )
-
- parser.add_argument(
- '--embeddings-response-queue',
- default=embeddings_response_queue,
- help=f'Embeddings response queue (default: {embeddings_response_queue})',
- )
-
- parser.add_argument(
- '--document-embeddings-request-queue',
- default=document_embeddings_request_queue,
- help=f'Document embeddings request queue (default: {document_embeddings_request_queue})',
- )
-
- parser.add_argument(
- '--document-embeddings-response-queue',
- default=document_embeddings_response_queue,
- help=f'Document embeddings response queue (default: {document_embeddings_response_queue})',
- )
-
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/retrieval/graph_rag/graph_rag.py b/trustgraph-flow/trustgraph/retrieval/graph_rag/graph_rag.py
new file mode 100644
index 00000000..6879023a
--- /dev/null
+++ b/trustgraph-flow/trustgraph/retrieval/graph_rag/graph_rag.py
@@ -0,0 +1,218 @@
+
+import asyncio
+
+LABEL="http://www.w3.org/2000/01/rdf-schema#label"
+
+class Query:
+
+ def __init__(
+ self, rag, user, collection, verbose,
+ entity_limit=50, triple_limit=30, max_subgraph_size=1000,
+ max_path_length=2,
+ ):
+ self.rag = rag
+ self.user = user
+ self.collection = collection
+ self.verbose = verbose
+ self.entity_limit = entity_limit
+ self.triple_limit = triple_limit
+ self.max_subgraph_size = max_subgraph_size
+ self.max_path_length = max_path_length
+
+ async def get_vector(self, query):
+
+ if self.verbose:
+ print("Compute embeddings...", flush=True)
+
+ qembeds = await self.rag.embeddings_client.embed(query)
+
+ if self.verbose:
+ print("Done.", flush=True)
+
+ return qembeds
+
+ async def get_entities(self, query):
+
+ vectors = await self.get_vector(query)
+
+ if self.verbose:
+ print("Get entities...", flush=True)
+
+ entities = await self.rag.graph_embeddings_client.query(
+ vectors=vectors, limit=self.entity_limit,
+ user=self.user, collection=self.collection,
+ )
+
+ entities = [
+ str(e)
+ for e in entities
+ ]
+
+ if self.verbose:
+ print("Entities:", flush=True)
+ for ent in entities:
+ print(" ", ent, flush=True)
+
+ return entities
+
+ async def maybe_label(self, e):
+
+ if e in self.rag.label_cache:
+ return self.rag.label_cache[e]
+
+ res = await self.rag.triples_client.query(
+ s=e, p=LABEL, o=None, limit=1,
+ user=self.user, collection=self.collection,
+ )
+
+ if len(res) == 0:
+ self.rag.label_cache[e] = e
+ return e
+
+ self.rag.label_cache[e] = str(res[0].o)
+ return self.rag.label_cache[e]
+
+ async def follow_edges(self, ent, subgraph, path_length):
+
+ # Not needed?
+ if path_length <= 0:
+ return
+
+ # Stop spanning around if the subgraph is already maxed out
+ if len(subgraph) >= self.max_subgraph_size:
+ return
+
+ res = await self.rag.triples_client.query(
+ s=ent, p=None, o=None,
+ limit=self.triple_limit,
+ user=self.user, collection=self.collection,
+ )
+
+ for triple in res:
+ subgraph.add(
+ (str(triple.s), str(triple.p), str(triple.o))
+ )
+ if path_length > 1:
+ await self.follow_edges(str(triple.o), subgraph, path_length-1)
+
+ res = await self.rag.triples_client.query(
+ s=None, p=ent, o=None,
+ limit=self.triple_limit,
+ user=self.user, collection=self.collection,
+ )
+
+ for triple in res:
+ subgraph.add(
+ (str(triple.s), str(triple.p), str(triple.o))
+ )
+
+ res = await self.rag.triples_client.query(
+ s=None, p=None, o=ent,
+ limit=self.triple_limit,
+ user=self.user, collection=self.collection,
+ )
+
+ for triple in res:
+ subgraph.add(
+ (str(triple.s), str(triple.p), str(triple.o))
+ )
+ if path_length > 1:
+ await self.follow_edges(
+ str(triple.s), subgraph, path_length-1
+ )
+
+ async def get_subgraph(self, query):
+
+ entities = await self.get_entities(query)
+
+ if self.verbose:
+ print("Get subgraph...", flush=True)
+
+ subgraph = set()
+
+ for ent in entities:
+ await self.follow_edges(ent, subgraph, self.max_path_length)
+
+ subgraph = list(subgraph)
+
+ return subgraph
+
+ async def get_labelgraph(self, query):
+
+ subgraph = await self.get_subgraph(query)
+
+ sg2 = []
+
+ for edge in subgraph:
+
+ if edge[1] == LABEL:
+ continue
+
+ s = await self.maybe_label(edge[0])
+ p = await self.maybe_label(edge[1])
+ o = await self.maybe_label(edge[2])
+
+ sg2.append((s, p, o))
+
+ sg2 = sg2[0:self.max_subgraph_size]
+
+ if self.verbose:
+ print("Subgraph:", flush=True)
+ for edge in sg2:
+ print(" ", str(edge), flush=True)
+
+ if self.verbose:
+ print("Done.", flush=True)
+
+ return sg2
+
+class GraphRag:
+
+ def __init__(
+ self, prompt_client, embeddings_client, graph_embeddings_client,
+ triples_client, verbose=False,
+ ):
+
+ self.verbose = verbose
+
+ self.prompt_client = prompt_client
+ self.embeddings_client = embeddings_client
+ self.graph_embeddings_client = graph_embeddings_client
+ self.triples_client = triples_client
+
+ self.label_cache = {}
+
+ if self.verbose:
+ print("Initialised", flush=True)
+
+ async def query(
+ self, query, user = "trustgraph", collection = "default",
+ entity_limit = 50, triple_limit = 30, max_subgraph_size = 1000,
+ max_path_length = 2,
+ ):
+
+ if self.verbose:
+ print("Construct prompt...", flush=True)
+
+ q = Query(
+ rag = self, user = user, collection = collection,
+ verbose = self.verbose, entity_limit = entity_limit,
+ triple_limit = triple_limit,
+ max_subgraph_size = max_subgraph_size,
+ max_path_length = max_path_length,
+ )
+
+ kg = await q.get_labelgraph(query)
+
+ if self.verbose:
+ print("Invoke LLM...", flush=True)
+ print(kg)
+ print(query)
+
+ resp = await self.prompt_client.kg_prompt(query, kg)
+
+ if self.verbose:
+ print("Done", flush=True)
+
+ return resp
+
diff --git a/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py b/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py
index 2c45ecd4..5d3cc2f4 100755
--- a/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py
+++ b/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py
@@ -5,57 +5,18 @@ Input is query, output is response.
"""
from ... schema import GraphRagQuery, GraphRagResponse, Error
-from ... schema import graph_rag_request_queue, graph_rag_response_queue
-from ... schema import prompt_request_queue
-from ... schema import prompt_response_queue
-from ... schema import embeddings_request_queue
-from ... schema import embeddings_response_queue
-from ... schema import graph_embeddings_request_queue
-from ... schema import graph_embeddings_response_queue
-from ... schema import triples_request_queue
-from ... schema import triples_response_queue
-from ... log_level import LogLevel
-from ... graph_rag import GraphRag
-from ... base import ConsumerProducer
+from . graph_rag import GraphRag
+from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
+from ... base import PromptClientSpec, EmbeddingsClientSpec
+from ... base import GraphEmbeddingsClientSpec, TriplesClientSpec
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "graph-rag"
-default_input_queue = graph_rag_request_queue
-default_output_queue = graph_rag_response_queue
-default_subscriber = module
-
-class Processor(ConsumerProducer):
+class Processor(FlowProcessor):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
-
- pr_request_queue = params.get(
- "prompt_request_queue", prompt_request_queue
- )
- pr_response_queue = params.get(
- "prompt_response_queue", prompt_response_queue
- )
- emb_request_queue = params.get(
- "embeddings_request_queue", embeddings_request_queue
- )
- emb_response_queue = params.get(
- "embeddings_response_queue", embeddings_response_queue
- )
- ge_request_queue = params.get(
- "graph_embeddings_request_queue", graph_embeddings_request_queue
- )
- ge_response_queue = params.get(
- "graph_embeddings_response_queue", graph_embeddings_response_queue
- )
- tpl_request_queue = params.get(
- "triples_request_queue", triples_request_queue
- )
- tpl_response_queue = params.get(
- "triples_response_queue", triples_response_queue
- )
+ id = params.get("id", default_ident)
entity_limit = params.get("entity_limit", 50)
triple_limit = params.get("triple_limit", 30)
@@ -64,49 +25,74 @@ class Processor(ConsumerProducer):
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": GraphRagQuery,
- "output_schema": GraphRagResponse,
+ "id": id,
"entity_limit": entity_limit,
"triple_limit": triple_limit,
"max_subgraph_size": max_subgraph_size,
- "prompt_request_queue": pr_request_queue,
- "prompt_response_queue": pr_response_queue,
- "embeddings_request_queue": emb_request_queue,
- "embeddings_response_queue": emb_response_queue,
- "graph_embeddings_request_queue": ge_request_queue,
- "graph_embeddings_response_queue": ge_response_queue,
- "triples_request_queue": triples_request_queue,
- "triples_response_queue": triples_response_queue,
+ "max_path_length": max_path_length,
}
)
- self.rag = GraphRag(
- pulsar_host=self.pulsar_host,
- pulsar_api_key=self.pulsar_api_key,
- pr_request_queue=pr_request_queue,
- pr_response_queue=pr_response_queue,
- emb_request_queue=emb_request_queue,
- emb_response_queue=emb_response_queue,
- ge_request_queue=ge_request_queue,
- ge_response_queue=ge_response_queue,
- tpl_request_queue=triples_request_queue,
- tpl_response_queue=triples_response_queue,
- verbose=True,
- module=module,
- )
-
self.default_entity_limit = entity_limit
self.default_triple_limit = triple_limit
self.default_max_subgraph_size = max_subgraph_size
self.default_max_path_length = max_path_length
- async def handle(self, msg):
+ self.register_specification(
+ ConsumerSpec(
+ name = "request",
+ schema = GraphRagQuery,
+ handler = self.on_request,
+ )
+ )
+
+ self.register_specification(
+ EmbeddingsClientSpec(
+ request_name = "embeddings-request",
+ response_name = "embeddings-response",
+ )
+ )
+
+ self.register_specification(
+ GraphEmbeddingsClientSpec(
+ request_name = "graph-embeddings-request",
+ response_name = "graph-embeddings-response",
+ )
+ )
+
+ self.register_specification(
+ TriplesClientSpec(
+ request_name = "triples-request",
+ response_name = "triples-response",
+ )
+ )
+
+ self.register_specification(
+ PromptClientSpec(
+ request_name = "prompt-request",
+ response_name = "prompt-response",
+ )
+ )
+
+ self.register_specification(
+ ProducerSpec(
+ name = "response",
+ schema = GraphRagResponse,
+ )
+ )
+
+ async def on_request(self, msg, consumer, flow):
try:
+ self.rag = GraphRag(
+ embeddings_client = flow("embeddings-request"),
+ graph_embeddings_client = flow("graph-embeddings-request"),
+ triples_client = flow("triples-request"),
+ prompt_client = flow("prompt-request"),
+ verbose=True,
+ )
+
v = msg.value()
# Sender-produced ID
@@ -134,16 +120,20 @@ class Processor(ConsumerProducer):
else:
max_path_length = self.default_max_path_length
- response = self.rag.query(
- query=v.query, user=v.user, collection=v.collection,
- entity_limit=entity_limit, triple_limit=triple_limit,
- max_subgraph_size=max_subgraph_size,
- max_path_length=max_path_length,
+ response = await self.rag.query(
+ query = v.query, user = v.user, collection = v.collection,
+ entity_limit = entity_limit, triple_limit = triple_limit,
+ max_subgraph_size = max_subgraph_size,
+ max_path_length = max_path_length,
)
- print("Send response...", flush=True)
- r = GraphRagResponse(response=response, error=None)
- await self.send(r, properties={"id": id})
+ await flow("response").send(
+ GraphRagResponse(
+ response = response,
+ error = None
+ ),
+ properties = {"id": id}
+ )
print("Done.", flush=True)
@@ -153,25 +143,21 @@ class Processor(ConsumerProducer):
print("Send error response...", flush=True)
- r = GraphRagResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
+ await flow("response").send(
+ GraphRagResponse(
+ response = None,
+ error = Error(
+ type = "graph-rag-error",
+ message = str(e),
+ ),
),
- response=None,
+ properties = {"id": id}
)
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
-
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ FlowProcessor.add_args(parser)
parser.add_argument(
'-e', '--entity-limit',
@@ -201,55 +187,7 @@ class Processor(ConsumerProducer):
help=f'Default max path length (default: 2)'
)
- parser.add_argument(
- '--prompt-request-queue',
- default=prompt_request_queue,
- help=f'Prompt request queue (default: {prompt_request_queue})',
- )
-
- parser.add_argument(
- '--prompt-response-queue',
- default=prompt_response_queue,
- help=f'Prompt response queue (default: {prompt_response_queue})',
- )
-
- parser.add_argument(
- '--embeddings-request-queue',
- default=embeddings_request_queue,
- help=f'Embeddings request queue (default: {embeddings_request_queue})',
- )
-
- parser.add_argument(
- '--embeddings-response-queue',
- default=embeddings_response_queue,
- help=f'Embeddings response queue (default: {embeddings_response_queue})',
- )
-
- parser.add_argument(
- '--graph-embeddings-request-queue',
- default=graph_embeddings_request_queue,
- help=f'Graph embeddings request queue (default: {graph_embeddings_request_queue})',
- )
-
- parser.add_argument(
- '--graph-embeddings-response-queue',
- default=graph_embeddings_response_queue,
- help=f'Graph embeddings response queue (default: {graph_embeddings_response_queue})',
- )
-
- parser.add_argument(
- '--triples-request-queue',
- default=triples_request_queue,
- help=f'Triples request queue (default: {triples_request_queue})',
- )
-
- parser.add_argument(
- '--triples-response-queue',
- default=triples_response_queue,
- help=f'Triples response queue (default: {triples_response_queue})',
- )
-
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/storage/doc_embeddings/milvus/write.py b/trustgraph-flow/trustgraph/storage/doc_embeddings/milvus/write.py
index b4dbc486..2949263a 100755
--- a/trustgraph-flow/trustgraph/storage/doc_embeddings/milvus/write.py
+++ b/trustgraph-flow/trustgraph/storage/doc_embeddings/milvus/write.py
@@ -10,7 +10,7 @@ from .... schema import document_embeddings_store_queue
from .... log_level import LogLevel
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "de-write"
default_input_queue = document_embeddings_store_queue
default_subscriber = module
diff --git a/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/write.py b/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/write.py
index 9e91db9a..128323aa 100644
--- a/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/write.py
+++ b/trustgraph-flow/trustgraph/storage/doc_embeddings/pinecone/write.py
@@ -16,7 +16,7 @@ from .... schema import document_embeddings_store_queue
from .... log_level import LogLevel
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "de-write"
default_input_queue = document_embeddings_store_queue
default_subscriber = module
diff --git a/trustgraph-flow/trustgraph/storage/doc_embeddings/qdrant/write.py b/trustgraph-flow/trustgraph/storage/doc_embeddings/qdrant/write.py
index 810c1931..d65a75eb 100644
--- a/trustgraph-flow/trustgraph/storage/doc_embeddings/qdrant/write.py
+++ b/trustgraph-flow/trustgraph/storage/doc_embeddings/qdrant/write.py
@@ -8,31 +8,21 @@ from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
import uuid
-from .... schema import DocumentEmbeddings
-from .... schema import document_embeddings_store_queue
-from .... log_level import LogLevel
-from .... base import Consumer
+from .... base import DocumentEmbeddingsStoreService
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "de-write"
-default_input_queue = document_embeddings_store_queue
-default_subscriber = module
default_store_uri = 'http://localhost:6333'
-class Processor(Consumer):
+class Processor(DocumentEmbeddingsStoreService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- subscriber = params.get("subscriber", default_subscriber)
store_uri = params.get("store_uri", default_store_uri)
api_key = params.get("api_key", None)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "subscriber": subscriber,
- "input_schema": DocumentEmbeddings,
"store_uri": store_uri,
"api_key": api_key,
}
@@ -40,13 +30,11 @@ class Processor(Consumer):
self.last_collection = None
- self.client = QdrantClient(url=store_uri)
+ self.qdrant = QdrantClient(url=store_uri, api_key=api_key)
- async def handle(self, msg):
+ async def store_document_embeddings(self, message):
- v = msg.value()
-
- for emb in v.chunks:
+ for emb in message.chunks:
chunk = emb.chunk.decode("utf-8")
if chunk == "": return
@@ -55,16 +43,17 @@ class Processor(Consumer):
dim = len(vec)
collection = (
- "d_" + v.metadata.user + "_" + v.metadata.collection + "_" +
+ "d_" + message.metadata.user + "_" +
+ message.metadata.collection + "_" +
str(dim)
)
if collection != self.last_collection:
- if not self.client.collection_exists(collection):
+ if not self.qdrant.collection_exists(collection):
try:
- self.client.create_collection(
+ self.qdrant.create_collection(
collection_name=collection,
vectors_config=VectorParams(
size=dim, distance=Distance.COSINE
@@ -76,7 +65,7 @@ class Processor(Consumer):
self.last_collection = collection
- self.client.upsert(
+ self.qdrant.upsert(
collection_name=collection,
points=[
PointStruct(
@@ -92,9 +81,7 @@ class Processor(Consumer):
@staticmethod
def add_args(parser):
- Consumer.add_args(
- parser, default_input_queue, default_subscriber,
- )
+ DocumentEmbeddingsStoreService.add_args(parser)
parser.add_argument(
'-t', '--store-uri',
@@ -110,5 +97,5 @@ class Processor(Consumer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py
index b2d40306..8d8b68b0 100755
--- a/trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py
+++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py
@@ -9,7 +9,7 @@ from .... log_level import LogLevel
from .... direct.milvus_graph_embeddings import EntityVectors
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "ge-write"
default_input_queue = graph_embeddings_store_queue
default_subscriber = module
diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py
index 83861b54..400acf26 100755
--- a/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py
+++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py
@@ -15,7 +15,7 @@ from .... schema import graph_embeddings_store_queue
from .... log_level import LogLevel
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "ge-write"
default_input_queue = graph_embeddings_store_queue
default_subscriber = module
diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py
index 6b0d7371..ecefee4f 100755
--- a/trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py
+++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py
@@ -8,31 +8,21 @@ from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
import uuid
-from .... schema import GraphEmbeddings
-from .... schema import graph_embeddings_store_queue
-from .... log_level import LogLevel
-from .... base import Consumer
+from .... base import GraphEmbeddingsStoreService
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "ge-write"
-default_input_queue = graph_embeddings_store_queue
-default_subscriber = module
default_store_uri = 'http://localhost:6333'
-class Processor(Consumer):
+class Processor(GraphEmbeddingsStoreService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- subscriber = params.get("subscriber", default_subscriber)
store_uri = params.get("store_uri", default_store_uri)
api_key = params.get("api_key", None)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "subscriber": subscriber,
- "input_schema": GraphEmbeddings,
"store_uri": store_uri,
"api_key": api_key,
}
@@ -40,7 +30,7 @@ class Processor(Consumer):
self.last_collection = None
- self.client = QdrantClient(url=store_uri, api_key=api_key)
+ self.qdrant = QdrantClient(url=store_uri, api_key=api_key)
def get_collection(self, dim, user, collection):
@@ -50,10 +40,10 @@ class Processor(Consumer):
if cname != self.last_collection:
- if not self.client.collection_exists(cname):
+ if not self.qdrant.collection_exists(cname):
try:
- self.client.create_collection(
+ self.qdrant.create_collection(
collection_name=cname,
vectors_config=VectorParams(
size=dim, distance=Distance.COSINE
@@ -67,11 +57,9 @@ class Processor(Consumer):
return cname
- async def handle(self, msg):
+ async def store_graph_embeddings(self, message):
- v = msg.value()
-
- for entity in v.entities:
+ for entity in message.entities:
if entity.entity.value == "" or entity.entity.value is None: return
@@ -80,10 +68,10 @@ class Processor(Consumer):
dim = len(vec)
collection = self.get_collection(
- dim, v.metadata.user, v.metadata.collection
+ dim, message.metadata.user, message.metadata.collection
)
- self.client.upsert(
+ self.qdrant.upsert(
collection_name=collection,
points=[
PointStruct(
@@ -99,9 +87,7 @@ class Processor(Consumer):
@staticmethod
def add_args(parser):
- Consumer.add_args(
- parser, default_input_queue, default_subscriber,
- )
+ GraphEmbeddingsStoreService.add_args(parser)
parser.add_argument(
'-t', '--store-uri',
@@ -117,5 +103,5 @@ class Processor(Consumer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/storage/knowledge/__init__.py b/trustgraph-flow/trustgraph/storage/knowledge/__init__.py
new file mode 100644
index 00000000..ff60c5fa
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/knowledge/__init__.py
@@ -0,0 +1,3 @@
+
+from . store import run
+
diff --git a/trustgraph-flow/trustgraph/storage/knowledge/__main__.py b/trustgraph-flow/trustgraph/storage/knowledge/__main__.py
new file mode 100644
index 00000000..92825a02
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/knowledge/__main__.py
@@ -0,0 +1,5 @@
+
+from . store import run
+
+if __name__ == '__main__':
+ run()
diff --git a/trustgraph-flow/trustgraph/storage/knowledge/store.py b/trustgraph-flow/trustgraph/storage/knowledge/store.py
new file mode 100644
index 00000000..62e915be
--- /dev/null
+++ b/trustgraph-flow/trustgraph/storage/knowledge/store.py
@@ -0,0 +1,78 @@
+
+"""
+Stores knowledge-cores in Cassandra
+"""
+
+import json
+import urllib.parse
+
+from ... schema import Triples, GraphEmbeddings
+from ... base import FlowProcessor, ConsumerSpec
+
+from ... tables.knowledge import KnowledgeTableStore
+
+default_ident = "kg-store"
+
+default_cassandra_host = "cassandra"
+keyspace = "knowledge"
+
+class Processor(FlowProcessor):
+
+ def __init__(self, **params):
+
+ id = params.get("id")
+
+ cassandra_host = params.get("cassandra_host", default_cassandra_host)
+ cassandra_user = params.get("cassandra_user")
+ cassandra_password = params.get("cassandra_password")
+
+ super(Processor, self).__init__(
+ **params | {
+ "id": id,
+ "cassandra_host": cassandra_host,
+ "cassandra_user": cassandra_user,
+ }
+ )
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "triples-input",
+ schema = Triples,
+ handler = self.on_triples
+ )
+ )
+
+ self.register_specification(
+ ConsumerSpec(
+ name = "graph-embeddings-input",
+ schema = GraphEmbeddings,
+ handler = self.on_graph_embeddings
+ )
+ )
+
+ self.table_store = KnowledgeTableStore(
+ cassandra_host = cassandra_host.split(","),
+ cassandra_user = cassandra_user,
+ cassandra_password = cassandra_password,
+ keyspace = keyspace,
+ )
+
+ async def on_triples(self, msg, consumer, flow):
+
+ v = msg.value()
+ await self.table_store.add_triples(v)
+
+ async def on_graph_embeddings(self, msg, consumer, flow):
+
+ v = msg.value()
+ await self.table_store.add_graph_embeddings(v)
+
+ @staticmethod
+ def add_args(parser):
+
+ FlowProcessor.add_args(parser)
+
+def run():
+
+ Processor.launch(default_ident, __doc__)
+
diff --git a/trustgraph-flow/trustgraph/storage/object_embeddings/milvus/write.py b/trustgraph-flow/trustgraph/storage/object_embeddings/milvus/write.py
index 5490af97..d1ad139a 100755
--- a/trustgraph-flow/trustgraph/storage/object_embeddings/milvus/write.py
+++ b/trustgraph-flow/trustgraph/storage/object_embeddings/milvus/write.py
@@ -9,7 +9,7 @@ from .... log_level import LogLevel
from .... direct.milvus_object_embeddings import ObjectVectors
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "oe-write"
default_input_queue = object_embeddings_store_queue
default_subscriber = module
diff --git a/trustgraph-flow/trustgraph/storage/rows/cassandra/write.py b/trustgraph-flow/trustgraph/storage/rows/cassandra/write.py
index e6536e6c..a84aefde 100755
--- a/trustgraph-flow/trustgraph/storage/rows/cassandra/write.py
+++ b/trustgraph-flow/trustgraph/storage/rows/cassandra/write.py
@@ -17,7 +17,7 @@ from .... schema import rows_store_queue
from .... log_level import LogLevel
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "rows-write"
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
default_input_queue = rows_store_queue
diff --git a/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py b/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py
index 17b5ae9a..f8396692 100755
--- a/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py
+++ b/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py
@@ -10,35 +10,26 @@ import argparse
import time
from .... direct.cassandra import TrustGraph
-from .... schema import Triples
-from .... schema import triples_store_queue
-from .... log_level import LogLevel
-from .... base import Consumer
+from .... base import TriplesStoreService
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "triples-write"
-default_input_queue = triples_store_queue
-default_subscriber = module
default_graph_host='localhost'
-class Processor(Consumer):
+class Processor(TriplesStoreService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- subscriber = params.get("subscriber", default_subscriber)
+ id = params.get("id", default_ident)
+
graph_host = params.get("graph_host", default_graph_host)
graph_username = params.get("graph_username", None)
graph_password = params.get("graph_password", None)
super(Processor, self).__init__(
**params | {
- "input_queue": input_queue,
- "subscriber": subscriber,
- "input_schema": Triples,
"graph_host": graph_host,
- "graph_username": graph_username,
- "graph_password": graph_password,
+ "graph_username": graph_username
}
)
@@ -47,11 +38,9 @@ class Processor(Consumer):
self.password = graph_password
self.table = None
- async def handle(self, msg):
+ async def store_triples(self, message):
- v = msg.value()
-
- table = (v.metadata.user, v.metadata.collection)
+ table = (message.metadata.user, message.metadata.collection)
if self.table is None or self.table != table:
@@ -61,13 +50,15 @@ class Processor(Consumer):
if self.username and self.password:
self.tg = TrustGraph(
hosts=self.graph_host,
- keyspace=v.metadata.user, table=v.metadata.collection,
+ keyspace=message.metadata.user,
+ table=message.metadata.collection,
username=self.username, password=self.password
)
else:
self.tg = TrustGraph(
hosts=self.graph_host,
- keyspace=v.metadata.user, table=v.metadata.collection,
+ keyspace=message.metadata.user,
+ table=message.metadata.collection,
)
except Exception as e:
print("Exception", e, flush=True)
@@ -76,7 +67,7 @@ class Processor(Consumer):
self.table = table
- for t in v.triples:
+ for t in message.triples:
self.tg.insert(
t.s.value,
t.p.value,
@@ -86,9 +77,7 @@ class Processor(Consumer):
@staticmethod
def add_args(parser):
- Consumer.add_args(
- parser, default_input_queue, default_subscriber,
- )
+ TriplesStoreService.add_args(parser)
parser.add_argument(
'-g', '--graph-host',
@@ -110,5 +99,5 @@ class Processor(Consumer):
def run():
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py b/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py
index 2d0ae38a..b3996b91 100755
--- a/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py
+++ b/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py
@@ -16,7 +16,7 @@ from .... schema import triples_store_queue
from .... log_level import LogLevel
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "triples-write"
default_input_queue = triples_store_queue
default_subscriber = module
diff --git a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py
index 620e669e..8c88ea8f 100755
--- a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py
+++ b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py
@@ -16,7 +16,7 @@ from .... schema import triples_store_queue
from .... log_level import LogLevel
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "triples-write"
default_input_queue = triples_store_queue
default_subscriber = module
diff --git a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py
index 3323f912..84a4d923 100755
--- a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py
+++ b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py
@@ -16,7 +16,7 @@ from .... schema import triples_store_queue
from .... log_level import LogLevel
from .... base import Consumer
-module = ".".join(__name__.split(".")[1:-1])
+module = "triples-write"
default_input_queue = triples_store_queue
default_subscriber = module
diff --git a/trustgraph-flow/trustgraph/tables/__init__.py b/trustgraph-flow/trustgraph/tables/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/trustgraph-flow/trustgraph/tables/config.py b/trustgraph-flow/trustgraph/tables/config.py
new file mode 100644
index 00000000..45dfc4d9
--- /dev/null
+++ b/trustgraph-flow/trustgraph/tables/config.py
@@ -0,0 +1,309 @@
+
+from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
+from .. schema import Metadata, Value, GraphEmbeddings
+
+from cassandra.cluster import Cluster
+from cassandra.auth import PlainTextAuthProvider
+from ssl import SSLContext, PROTOCOL_TLSv1_2
+
+import uuid
+import time
+import asyncio
+
+class ConfigTableStore:
+
+ def __init__(
+ self,
+ cassandra_host, cassandra_user, cassandra_password, keyspace,
+ ):
+
+ self.keyspace = keyspace
+
+ print("Connecting to Cassandra...", flush=True)
+
+ if cassandra_user and cassandra_password:
+ ssl_context = SSLContext(PROTOCOL_TLSv1_2)
+ auth_provider = PlainTextAuthProvider(
+ username=cassandra_user, password=cassandra_password
+ )
+ self.cluster = Cluster(
+ cassandra_host,
+ auth_provider=auth_provider,
+ ssl_context=ssl_context
+ )
+ else:
+ self.cluster = Cluster(cassandra_host)
+
+ self.cassandra = self.cluster.connect()
+
+ print("Connected.", flush=True)
+
+ self.ensure_cassandra_schema()
+
+ self.prepare_statements()
+
+ def ensure_cassandra_schema(self):
+
+ print("Ensure Cassandra schema...", flush=True)
+
+ print("Keyspace...", flush=True)
+
+ # FIXME: Replication factor should be configurable
+ self.cassandra.execute(f"""
+ create keyspace if not exists {self.keyspace}
+ with replication = {{
+ 'class' : 'SimpleStrategy',
+ 'replication_factor' : 1
+ }};
+ """);
+
+ self.cassandra.set_keyspace(self.keyspace)
+
+ print("config table...", flush=True)
+
+ self.cassandra.execute("""
+ CREATE TABLE IF NOT EXISTS config (
+ class text,
+ key text,
+ value text,
+ PRIMARY KEY (class, key)
+ );
+ """);
+
+ print("version table...", flush=True)
+
+ self.cassandra.execute("""
+ CREATE TABLE IF NOT EXISTS version (
+ id text,
+ version counter,
+ PRIMARY KEY (id)
+ );
+ """);
+
+ resp = self.cassandra.execute("""
+ SELECT version FROM version
+ """)
+
+ print("ensure version...", flush=True)
+
+ self.cassandra.execute("""
+ UPDATE version set version = version + 0
+ WHERE id = 'version'
+ """)
+
+ print("Cassandra schema OK.", flush=True)
+
+ async def inc_version(self):
+
+ self.cassandra.execute("""
+ UPDATE version set version = version + 1
+ WHERE id = 'version'
+ """)
+
+ async def get_version(self):
+
+ resp = self.cassandra.execute("""
+ SELECT version FROM version
+ WHERE id = 'version'
+ """)
+
+ row = resp.one()
+
+ if row: return row[0]
+
+ return None
+
+ def prepare_statements(self):
+
+ self.put_config_stmt = self.cassandra.prepare("""
+ INSERT INTO config ( class, key, value )
+ VALUES (?, ?, ?)
+ """)
+
+ self.get_classes_stmt = self.cassandra.prepare("""
+ SELECT DISTINCT class FROM config;
+ """)
+
+ self.get_keys_stmt = self.cassandra.prepare("""
+ SELECT key FROM config WHERE class = ?;
+ """)
+
+ self.get_value_stmt = self.cassandra.prepare("""
+ SELECT value FROM config WHERE class = ? AND key = ?;
+ """)
+
+ self.delete_key_stmt = self.cassandra.prepare("""
+ DELETE FROM config
+ WHERE class = ? AND key = ?;
+ """)
+
+ self.get_all_stmt = self.cassandra.prepare("""
+ SELECT class, key, value FROM config;
+ """)
+
+ self.get_values_stmt = self.cassandra.prepare("""
+ SELECT key, value FROM config WHERE class = ?;
+ """)
+
+ async def put_config(self, cls, key, value):
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.put_config_stmt,
+ ( cls, key, value )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ async def get_value(self, cls, key):
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_value_stmt,
+ ( cls, key )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ for row in resp:
+ return row[0]
+
+ return None
+
+ async def get_values(self, cls):
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_values_stmt,
+ ( cls, )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ return [
+ [row[0], row[1]]
+ for row in resp
+ ]
+
+ async def get_classes(self):
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_classes_stmt,
+ ()
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ return [
+ row[0] for row in resp
+ ]
+
+ async def get_all(self):
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_all_stmt,
+ ()
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ return [
+ (row[0], row[1], row[2])
+ for row in resp
+ ]
+
+ async def get_keys(self, cls):
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_keys_stmt,
+ ( cls, )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ return [
+ row[0] for row in resp
+ ]
+
+ async def delete_key(self, cls, key):
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.delete_key_stmt,
+ (cls, key)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
diff --git a/trustgraph-flow/trustgraph/librarian/table_store.py b/trustgraph-flow/trustgraph/tables/knowledge.py
similarity index 53%
rename from trustgraph-flow/trustgraph/librarian/table_store.py
rename to trustgraph-flow/trustgraph/tables/knowledge.py
index 1fe47fcf..36414dc4 100644
--- a/trustgraph-flow/trustgraph/librarian/table_store.py
+++ b/trustgraph-flow/trustgraph/tables/knowledge.py
@@ -1,16 +1,16 @@
-from .. schema import LibrarianRequest, LibrarianResponse
-from .. schema import DocumentInfo, Error, Triple, Value
-from .. knowledge import hash
-from .. exceptions import RequestError
+
+from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
+from .. schema import Metadata, Value, GraphEmbeddings
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
-from cassandra.query import BatchStatement
from ssl import SSLContext, PROTOCOL_TLSv1_2
+
import uuid
import time
+import asyncio
-class TableStore:
+class KnowledgeTableStore:
def __init__(
self,
@@ -59,38 +59,11 @@ class TableStore:
self.cassandra.set_keyspace(self.keyspace)
- print("document table...", flush=True)
-
- self.cassandra.execute("""
- CREATE TABLE IF NOT EXISTS document (
- user text,
- collection text,
- id text,
- time timestamp,
- title text,
- comments text,
- kind text,
- object_id uuid,
- metadata list>,
- PRIMARY KEY (user, collection, id)
- );
- """);
-
- print("object index...", flush=True)
-
- self.cassandra.execute("""
- CREATE INDEX IF NOT EXISTS document_object
- ON document (object_id)
- """);
-
print("triples table...", flush=True)
self.cassandra.execute("""
CREATE TABLE IF NOT EXISTS triples (
user text,
- collection text,
document_id text,
id uuid,
time timestamp,
@@ -100,7 +73,7 @@ class TableStore:
triples list>,
- PRIMARY KEY (user, collection, document_id, id)
+ PRIMARY KEY ((user, document_id), id)
);
""");
@@ -109,7 +82,6 @@ class TableStore:
self.cassandra.execute("""
create table if not exists graph_embeddings (
user text,
- collection text,
document_id text,
id uuid,
time timestamp,
@@ -122,16 +94,20 @@ class TableStore:
list>
>
>,
- PRIMARY KEY (user, collection, document_id, id)
+ PRIMARY KEY ((user, document_id), id)
);
""");
+ self.cassandra.execute("""
+ CREATE INDEX IF NOT EXISTS graph_embeddings_user ON
+ graph_embeddings ( user );
+ """);
+
print("document_embeddings table...", flush=True)
self.cassandra.execute("""
create table if not exists document_embeddings (
user text,
- collection text,
document_id text,
id uuid,
time timestamp,
@@ -144,109 +120,78 @@ class TableStore:
list>
>
>,
- PRIMARY KEY (user, collection, document_id, id)
+ PRIMARY KEY ((user, document_id), id)
);
""");
+ self.cassandra.execute("""
+ CREATE INDEX IF NOT EXISTS document_embeddings_user ON
+ document_embeddings ( user );
+ """);
+
print("Cassandra schema OK.", flush=True)
def prepare_statements(self):
- self.insert_document_stmt = self.cassandra.prepare("""
- INSERT INTO document
- (
- id, user, collection, kind, object_id, time, title, comments,
- metadata
- )
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
- """)
-
- self.list_document_stmt = self.cassandra.prepare("""
- SELECT
- id, kind, user, collection, title, comments, time, metadata
- FROM document
- WHERE user = ?
- """)
-
- self.list_document_by_collection_stmt = self.cassandra.prepare("""
- SELECT
- id, kind, user, collection, title, comments, time, metadata
- FROM document
- WHERE user = ? AND collection = ?
- """)
-
self.insert_triples_stmt = self.cassandra.prepare("""
INSERT INTO triples
(
- id, user, collection, document_id, time,
- metadata, triples
+ id, user, document_id,
+ time, metadata, triples
)
- VALUES (?, ?, ?, ?, ?, ?, ?)
+ VALUES (?, ?, ?, ?, ?, ?)
""")
self.insert_graph_embeddings_stmt = self.cassandra.prepare("""
INSERT INTO graph_embeddings
(
- id, user, collection, document_id, time,
- metadata, entity_embeddings
+ id, user, document_id, time, metadata, entity_embeddings
)
- VALUES (?, ?, ?, ?, ?, ?, ?)
+ VALUES (?, ?, ?, ?, ?, ?)
""")
self.insert_document_embeddings_stmt = self.cassandra.prepare("""
INSERT INTO document_embeddings
(
- id, user, collection, document_id, time,
- metadata, chunks
+ id, user, document_id, time, metadata, chunks
)
- VALUES (?, ?, ?, ?, ?, ?, ?)
+ VALUES (?, ?, ?, ?, ?, ?)
""")
- def add(self, object_id, document):
+ self.list_cores_stmt = self.cassandra.prepare("""
+ SELECT DISTINCT user, document_id FROM graph_embeddings
+ WHERE user = ?
+ """)
- if document.kind not in (
- "text/plain", "application/pdf"
- ):
- raise RequestError("Invalid document kind: " + document.kind)
+ self.get_triples_stmt = self.cassandra.prepare("""
+ SELECT id, time, metadata, triples
+ FROM triples
+ WHERE user = ? AND document_id = ?
+ """)
- # Create random doc ID
- when = int(time.time() * 1000)
+ self.get_graph_embeddings_stmt = self.cassandra.prepare("""
+ SELECT id, time, metadata, entity_embeddings
+ FROM graph_embeddings
+ WHERE user = ? AND document_id = ?
+ """)
- print("Adding", document.id, object_id)
+ self.get_document_embeddings_stmt = self.cassandra.prepare("""
+ SELECT id, time, metadata, chunks
+ FROM document_embeddings
+ WHERE user = ? AND document_id = ?
+ """)
- metadata = [
- (
- v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
- v.o.value, v.o.is_uri
- )
- for v in document.metadata
- ]
+ self.delete_triples_stmt = self.cassandra.prepare("""
+ DELETE FROM triples
+ WHERE user = ? AND document_id = ?
+ """)
- while True:
+ self.delete_graph_embeddings_stmt = self.cassandra.prepare("""
+ DELETE FROM graph_embeddings
+ WHERE user = ? AND document_id = ?
+ """)
- try:
-
- resp = self.cassandra.execute(
- self.insert_document_stmt,
- (
- document.id, document.user, document.collection,
- document.kind, object_id, when,
- document.title, document.comments,
- metadata
- )
- )
-
- break
-
- except Exception as e:
-
- print("Exception:", type(e))
- print(f"{e}, retry...", flush=True)
- time.sleep(1)
-
- print("Add complete", flush=True)
-
- def add_triples(self, m):
+ async def add_triples(self, m):
when = int(time.time() * 1000)
@@ -277,7 +222,7 @@ class TableStore:
self.insert_triples_stmt,
(
uuid.uuid4(), m.metadata.user,
- m.metadata.collection, m.metadata.id, when,
+ m.metadata.id, when,
metadata, triples,
)
)
@@ -287,77 +232,11 @@ class TableStore:
except Exception as e:
print("Exception:", type(e))
+ raise e
print(f"{e}, retry...", flush=True)
- time.sleep(1)
+ await asyncio.sleep(1)
- def list(self, user, collection=None):
-
- print("LIST")
- while True:
-
- print("TRY")
-
- print(self.list_document_stmt)
- try:
-
- if collection:
- resp = self.cassandra.execute(
- self.list_document_by_collection_stmt,
- (user, collection)
- )
- else:
- resp = self.cassandra.execute(
- self.list_document_stmt,
- (user,)
- )
- break
-
- print("OK")
-
- except Exception as e:
- print("Exception:", type(e))
- print(f"{e}, retry...", flush=True)
- time.sleep(1)
-
- print("OK2")
-
- info = [
- DocumentInfo(
- id = row[0],
- kind = row[1],
- user = row[2],
- collection = row[3],
- title = row[4],
- comments = row[5],
- time = int(1000 * row[6].timestamp()),
- metadata = [
- Triple(
- s=Value(value=m[0], is_uri=m[1]),
- p=Value(value=m[2], is_uri=m[3]),
- o=Value(value=m[4], is_uri=m[5])
- )
- for m in row[7]
- ],
- )
- for row in resp
- ]
-
- print("OK3")
-
- print(info[0])
-
- print(info[0].user)
- print(info[0].time)
- print(info[0].kind)
- print(info[0].collection)
- print(info[0].title)
- print(info[0].comments)
- print(info[0].metadata)
- print(info[0].metadata)
-
- return info
-
- def add_graph_embeddings(self, m):
+ async def add_graph_embeddings(self, m):
when = int(time.time() * 1000)
@@ -388,7 +267,7 @@ class TableStore:
self.insert_graph_embeddings_stmt,
(
uuid.uuid4(), m.metadata.user,
- m.metadata.collection, m.metadata.id, when,
+ m.metadata.id, when,
metadata, entities,
)
)
@@ -398,10 +277,11 @@ class TableStore:
except Exception as e:
print("Exception:", type(e))
+ raise e
print(f"{e}, retry...", flush=True)
- time.sleep(1)
+ await asyncio.sleep(1)
- def add_document_embeddings(self, m):
+ async def add_document_embeddings(self, m):
when = int(time.time() * 1000)
@@ -432,7 +312,7 @@ class TableStore:
self.insert_document_embeddings_stmt,
(
uuid.uuid4(), m.metadata.user,
- m.metadata.collection, m.metadata.id, when,
+ m.metadata.id, when,
metadata, chunks,
)
)
@@ -442,7 +322,191 @@ class TableStore:
except Exception as e:
print("Exception:", type(e))
+ raise e
print(f"{e}, retry...", flush=True)
- time.sleep(1)
+ await asyncio.sleep(1)
+
+ async def list_kg_cores(self, user):
+
+ print("List kg cores...")
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.list_cores_stmt,
+ (user,)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+
+ lst = [
+ row[1]
+ for row in resp
+ ]
+
+ print("Done")
+
+ return lst
+
+ async def delete_kg_core(self, user, document_id):
+
+ print("Delete kg cores...")
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.delete_triples_stmt,
+ (user, document_id)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.delete_graph_embeddings_stmt,
+ (user, document_id)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ async def get_triples(self, user, document_id, receiver):
+
+ print("Get triples...")
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_triples_stmt,
+ (user, document_id)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ for row in resp:
+
+ if row[2]:
+ metadata = [
+ Triple(
+ s = Value(value = elt[0], is_uri = elt[1]),
+ p = Value(value = elt[2], is_uri = elt[3]),
+ o = Value(value = elt[4], is_uri = elt[5]),
+ )
+ for elt in row[2]
+ ]
+ else:
+ metadata = []
+
+ triples = [
+ Triple(
+ s = Value(value = elt[0], is_uri = elt[1]),
+ p = Value(value = elt[2], is_uri = elt[3]),
+ o = Value(value = elt[4], is_uri = elt[5]),
+ )
+ for elt in row[3]
+ ]
+
+ await receiver(
+ Triples(
+ metadata = Metadata(
+ id = document_id,
+ user = user,
+ collection = "default", # FIXME: What to put here?
+ metadata = metadata,
+ ),
+ triples = triples
+ )
+ )
+
+ print("Done")
+
+ async def get_graph_embeddings(self, user, document_id, receiver):
+
+ print("Get GE...")
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_graph_embeddings_stmt,
+ (user, document_id)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ for row in resp:
+
+ if row[2]:
+ metadata = [
+ Triple(
+ s = Value(value = elt[0], is_uri = elt[1]),
+ p = Value(value = elt[2], is_uri = elt[3]),
+ o = Value(value = elt[4], is_uri = elt[5]),
+ )
+ for elt in row[2]
+ ]
+ else:
+ metadata = []
+
+ entities = [
+ EntityEmbeddings(
+ entity = Value(value = ent[0][0], is_uri = ent[0][1]),
+ vectors = ent[1]
+ )
+ for ent in row[3]
+ ]
+
+ await receiver(
+ GraphEmbeddings(
+ metadata = Metadata(
+ id = document_id,
+ user = user,
+ collection = "default", # FIXME: What to put here?
+ metadata = metadata,
+ ),
+ entities = entities
+ )
+ )
+
+ print("Done")
-
diff --git a/trustgraph-flow/trustgraph/tables/library.py b/trustgraph-flow/trustgraph/tables/library.py
new file mode 100644
index 00000000..c8cdb027
--- /dev/null
+++ b/trustgraph-flow/trustgraph/tables/library.py
@@ -0,0 +1,534 @@
+
+from .. schema import LibrarianRequest, LibrarianResponse
+from .. schema import DocumentMetadata, ProcessingMetadata
+from .. schema import Error, Triple, Value
+from .. knowledge import hash
+from .. exceptions import RequestError
+
+from cassandra.cluster import Cluster
+from cassandra.auth import PlainTextAuthProvider
+from cassandra.query import BatchStatement
+from ssl import SSLContext, PROTOCOL_TLSv1_2
+
+import uuid
+import time
+import asyncio
+
+class LibraryTableStore:
+
+ def __init__(
+ self,
+ cassandra_host, cassandra_user, cassandra_password, keyspace,
+ ):
+
+ self.keyspace = keyspace
+
+ print("Connecting to Cassandra...", flush=True)
+
+ if cassandra_user and cassandra_password:
+ ssl_context = SSLContext(PROTOCOL_TLSv1_2)
+ auth_provider = PlainTextAuthProvider(
+ username=cassandra_user, password=cassandra_password
+ )
+ self.cluster = Cluster(
+ cassandra_host,
+ auth_provider=auth_provider,
+ ssl_context=ssl_context
+ )
+ else:
+ self.cluster = Cluster(cassandra_host)
+
+ self.cassandra = self.cluster.connect()
+
+ print("Connected.", flush=True)
+
+ self.ensure_cassandra_schema()
+
+ self.prepare_statements()
+
+ def ensure_cassandra_schema(self):
+
+ print("Ensure Cassandra schema...", flush=True)
+
+ print("Keyspace...", flush=True)
+
+ # FIXME: Replication factor should be configurable
+ self.cassandra.execute(f"""
+ create keyspace if not exists {self.keyspace}
+ with replication = {{
+ 'class' : 'SimpleStrategy',
+ 'replication_factor' : 1
+ }};
+ """);
+
+ self.cassandra.set_keyspace(self.keyspace)
+
+ print("document table...", flush=True)
+
+ self.cassandra.execute("""
+ CREATE TABLE IF NOT EXISTS document (
+ id text,
+ user text,
+ time timestamp,
+ kind text,
+ title text,
+ comments text,
+ metadata list>,
+ tags list,
+ object_id uuid,
+ PRIMARY KEY (user, id)
+ );
+ """);
+
+ print("object index...", flush=True)
+
+ self.cassandra.execute("""
+ CREATE INDEX IF NOT EXISTS document_object
+ ON document (object_id)
+ """);
+
+ print("processing table...", flush=True)
+
+ self.cassandra.execute("""
+ CREATE TABLE IF NOT EXISTS processing (
+ id text,
+ document_id text,
+ time timestamp,
+ flow text,
+ user text,
+ collection text,
+ tags list,
+ PRIMARY KEY (user, id)
+ );
+ """);
+
+ print("Cassandra schema OK.", flush=True)
+
+ def prepare_statements(self):
+
+ self.insert_document_stmt = self.cassandra.prepare("""
+ INSERT INTO document
+ (
+ id, user, time,
+ kind, title, comments,
+ metadata, tags, object_id
+ )
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+ """)
+
+ self.update_document_stmt = self.cassandra.prepare("""
+ UPDATE document
+ SET time = ?, title = ?, comments = ?,
+ metadata = ?, tags = ?
+ WHERE user = ? AND id = ?
+ """)
+
+ self.get_document_stmt = self.cassandra.prepare("""
+ SELECT time, kind, title, comments, metadata, tags, object_id
+ FROM document
+ WHERE user = ? AND id = ?
+ """)
+
+ self.delete_document_stmt = self.cassandra.prepare("""
+ DELETE FROM document
+ WHERE user = ? AND id = ?
+ """)
+
+ self.test_document_exists_stmt = self.cassandra.prepare("""
+ SELECT id
+ FROM document
+ WHERE user = ? AND id = ?
+ LIMIT 1
+ """)
+
+ self.list_document_stmt = self.cassandra.prepare("""
+ SELECT
+ id, time, kind, title, comments, metadata, tags, object_id
+ FROM document
+ WHERE user = ?
+ """)
+
+ self.list_document_by_tag_stmt = self.cassandra.prepare("""
+ SELECT
+ id, time, kind, title, comments, metadata, tags, object_id
+ FROM document
+ WHERE user = ? AND tags CONTAINS ?
+ ALLOW FILTERING
+ """)
+
+ self.insert_processing_stmt = self.cassandra.prepare("""
+ INSERT INTO processing
+ (
+ id, document_id, time,
+ flow, user, collection,
+ tags
+ )
+ VALUES (?, ?, ?, ?, ?, ?, ?)
+ """)
+
+ self.delete_processing_stmt = self.cassandra.prepare("""
+ DELETE FROM processing
+ WHERE user = ? AND id = ?
+ """)
+
+ self.test_processing_exists_stmt = self.cassandra.prepare("""
+ SELECT id
+ FROM processing
+ WHERE user = ? AND id = ?
+ LIMIT 1
+ """)
+
+ self.list_processing_stmt = self.cassandra.prepare("""
+ SELECT
+ id, document_id, time, flow, collection, tags
+ FROM processing
+ WHERE user = ?
+ """)
+
+ async def document_exists(self, user, id):
+
+ resp = self.cassandra.execute(
+ self.test_document_exists_stmt,
+ ( user, id )
+ )
+
+ # If a row exists, document exists. It's a cursor, can't just
+ # count the length
+
+ for row in resp:
+ return True
+
+ return False
+
+ async def add_document(self, document, object_id):
+
+ print("Adding document", document.id, object_id)
+
+ metadata = [
+ (
+ v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
+ v.o.value, v.o.is_uri
+ )
+ for v in document.metadata
+ ]
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.insert_document_stmt,
+ (
+ document.id, document.user, int(document.time * 1000),
+ document.kind, document.title, document.comments,
+ metadata, document.tags, object_id
+ )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ print("Add complete", flush=True)
+
+ async def update_document(self, document):
+
+ print("Updating document", document.id)
+
+ metadata = [
+ (
+ v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
+ v.o.value, v.o.is_uri
+ )
+ for v in document.metadata
+ ]
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.update_document_stmt,
+ (
+ int(document.time * 1000), document.title,
+ document.comments, metadata, document.tags,
+ document.user, document.id
+ )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ print("Update complete", flush=True)
+
+ async def remove_document(self, user, document_id):
+
+ print("Removing document", document_id)
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.delete_document_stmt,
+ (
+ user, document_id
+ )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ print("Delete complete", flush=True)
+
+ async def list_documents(self, user):
+
+ print("List documents...")
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.list_document_stmt,
+ (user,)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+
+ lst = [
+ DocumentMetadata(
+ id = row[0],
+ user = user,
+ time = int(time.mktime(row[1].timetuple())),
+ kind = row[2],
+ title = row[3],
+ comments = row[4],
+ metadata = [
+ Triple(
+ s=Value(value=m[0], is_uri=m[1]),
+ p=Value(value=m[2], is_uri=m[3]),
+ o=Value(value=m[4], is_uri=m[5])
+ )
+ for m in row[5]
+ ],
+ tags = row[6] if row[6] else [],
+ object_id = row[7],
+ )
+ for row in resp
+ ]
+
+ print("Done")
+
+ return lst
+
+ async def get_document(self, user, id):
+
+ print("Get document")
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_document_stmt,
+ (user, id)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+
+ for row in resp:
+ doc = DocumentMetadata(
+ id = id,
+ user = user,
+ time = int(time.mktime(row[0].timetuple())),
+ kind = row[1],
+ title = row[2],
+ comments = row[3],
+ metadata = [
+ Triple(
+ s=Value(value=m[0], is_uri=m[1]),
+ p=Value(value=m[2], is_uri=m[3]),
+ o=Value(value=m[4], is_uri=m[5])
+ )
+ for m in row[4]
+ ],
+ tags = row[5] if row[5] else [],
+ object_id = row[6],
+ )
+
+ print("Done")
+ return doc
+
+ raise RuntimeError("No such document row?")
+
+ async def get_document_object_id(self, user, id):
+
+ print("Get document obj ID")
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.get_document_stmt,
+ (user, id)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+
+ for row in resp:
+ print("Done")
+ return row[6]
+
+ raise RuntimeError("No such document row?")
+
+ async def processing_exists(self, user, id):
+
+ resp = self.cassandra.execute(
+ self.test_processing_exists_stmt,
+ ( user, id )
+ )
+
+ # If a row exists, document exists. It's a cursor, can't just
+ # count the length
+
+ for row in resp:
+ return True
+
+ return False
+
+ async def add_processing(self, processing):
+
+ print("Adding processing", processing.id)
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.insert_processing_stmt,
+ (
+ processing.id, processing.document_id,
+ int(processing.time * 1000), processing.flow,
+ processing.user, processing.collection,
+ processing.tags
+ )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ print("Add complete", flush=True)
+
+ async def remove_processing(self, user, processing_id):
+
+ print("Removing processing", processing_id)
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.delete_processing_stmt,
+ (
+ user, processing_id
+ )
+ )
+
+ break
+
+ except Exception as e:
+
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+ print("Delete complete", flush=True)
+
+ async def list_processing(self, user):
+
+ print("List processing objects")
+
+ while True:
+
+ try:
+
+ resp = self.cassandra.execute(
+ self.list_processing_stmt,
+ (user,)
+ )
+
+ break
+
+ except Exception as e:
+ print("Exception:", type(e))
+ raise e
+ print(f"{e}, retry...", flush=True)
+ await asyncio.sleep(1)
+
+
+ lst = [
+ ProcessingMetadata(
+ id = row[0],
+ document_id = row[1],
+ time = int(time.mktime(row[2].timetuple())),
+ flow = row[3],
+ user = user,
+ collection = row[4],
+ tags = row[5] if row[5] else [],
+ )
+ for row in resp
+ ]
+
+ print("Done")
+
+ return lst
+
diff --git a/trustgraph-ocr/setup.py b/trustgraph-ocr/setup.py
index 43e15061..5e2531fe 100644
--- a/trustgraph-ocr/setup.py
+++ b/trustgraph-ocr/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
python_requires='>=3.8',
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
install_requires=[
- "trustgraph-base>=0.21,<0.22",
+ "trustgraph-base>=0.23,<0.24",
"pulsar-client",
"prometheus-client",
"boto3",
diff --git a/trustgraph-ocr/trustgraph/decoding/ocr/pdf_decoder.py b/trustgraph-ocr/trustgraph/decoding/ocr/pdf_decoder.py
index f8926589..5fa436b8 100755
--- a/trustgraph-ocr/trustgraph/decoding/ocr/pdf_decoder.py
+++ b/trustgraph-ocr/trustgraph/decoding/ocr/pdf_decoder.py
@@ -14,7 +14,7 @@ from ... schema import document_ingest_queue, text_ingest_queue
from ... log_level import LogLevel
from ... base import ConsumerProducer
-module = ".".join(__name__.split(".")[1:-1])
+module = "ocr"
default_input_queue = document_ingest_queue
default_output_queue = text_ingest_queue
diff --git a/trustgraph-vertexai/setup.py b/trustgraph-vertexai/setup.py
index 1258fea9..5381c043 100644
--- a/trustgraph-vertexai/setup.py
+++ b/trustgraph-vertexai/setup.py
@@ -34,7 +34,7 @@ setuptools.setup(
python_requires='>=3.8',
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
install_requires=[
- "trustgraph-base>=0.21,<0.22",
+ "trustgraph-base>=0.23,<0.24",
"pulsar-client",
"google-cloud-aiplatform",
"prometheus-client",
diff --git a/trustgraph-vertexai/trustgraph/model/text_completion/vertexai/llm.py b/trustgraph-vertexai/trustgraph/model/text_completion/vertexai/llm.py
index 4d38c8c0..854be961 100755
--- a/trustgraph-vertexai/trustgraph/model/text_completion/vertexai/llm.py
+++ b/trustgraph-vertexai/trustgraph/model/text_completion/vertexai/llm.py
@@ -4,50 +4,30 @@ Simple LLM service, performs text prompt completion using VertexAI on
Google Cloud. Input is prompt, output is response.
"""
-import vertexai
-import time
-from prometheus_client import Histogram
-import os
-
from google.oauth2 import service_account
import google
+import vertexai
from vertexai.preview.generative_models import (
- Content,
- FunctionDeclaration,
- GenerativeModel,
- GenerationConfig,
- HarmCategory,
- HarmBlockThreshold,
- Part,
- Tool,
+ Content, FunctionDeclaration, GenerativeModel, GenerationConfig,
+ HarmCategory, HarmBlockThreshold, Part, Tool,
)
-from .... schema import TextCompletionRequest, TextCompletionResponse, Error
-from .... schema import text_completion_request_queue
-from .... schema import text_completion_response_queue
-from .... log_level import LogLevel
-from .... base import ConsumerProducer
from .... exceptions import TooManyRequests
+from .... base import LlmService, LlmResult
-module = ".".join(__name__.split(".")[1:-1])
+default_ident = "text-completion"
-default_input_queue = text_completion_request_queue
-default_output_queue = text_completion_response_queue
-default_subscriber = module
default_model = 'gemini-1.0-pro-001'
default_region = 'us-central1'
default_temperature = 0.0
default_max_output = 8192
default_private_key = "private.json"
-class Processor(ConsumerProducer):
+class Processor(LlmService):
def __init__(self, **params):
- input_queue = params.get("input_queue", default_input_queue)
- output_queue = params.get("output_queue", default_output_queue)
- subscriber = params.get("subscriber", default_subscriber)
region = params.get("region", default_region)
model = params.get("model", default_model)
private_key = params.get("private_key", default_private_key)
@@ -57,28 +37,7 @@ class Processor(ConsumerProducer):
if private_key is None:
raise RuntimeError("Private key file not specified")
- super(Processor, self).__init__(
- **params | {
- "input_queue": input_queue,
- "output_queue": output_queue,
- "subscriber": subscriber,
- "input_schema": TextCompletionRequest,
- "output_schema": TextCompletionResponse,
- }
- )
-
- if not hasattr(__class__, "text_completion_metric"):
- __class__.text_completion_metric = Histogram(
- 'text_completion_duration',
- 'Text completion duration (seconds)',
- buckets=[
- 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
- 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
- 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
- 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
- 120.0
- ]
- )
+ super(Processor, self).__init__(**params)
self.parameters = {
"temperature": temperature,
@@ -110,7 +69,11 @@ class Processor(ConsumerProducer):
print("Initialise VertexAI...", flush=True)
if private_key:
- credentials = service_account.Credentials.from_service_account_file(private_key)
+ credentials = (
+ service_account.Credentials.from_service_account_file(
+ private_key
+ )
+ )
else:
credentials = None
@@ -131,50 +94,30 @@ class Processor(ConsumerProducer):
print("Initialisation complete", flush=True)
- async def handle(self, msg):
+ async def generate_content(self, system, prompt):
try:
- v = msg.value()
+ prompt = system + "\n\n" + prompt
- # Sender-produced ID
+ response = self.llm.generate_content(
+ prompt, generation_config=self.generation_config,
+ safety_settings=self.safety_settings
+ )
- id = msg.properties()["id"]
+ resp = LlmResult(
+ text = response.text,
+ in_token = response.usage_metadata.prompt_token_count,
+ out_token = response.usage_metadata.candidates_token_count,
+ model = self.model
+ )
- print(f"Handling prompt {id}...", flush=True)
-
- prompt = v.system + "\n\n" + v.prompt
-
- with __class__.text_completion_metric.time():
-
- response = self.llm.generate_content(
- prompt, generation_config=self.generation_config,
- safety_settings=self.safety_settings
- )
-
- resp = response.text
- inputtokens = int(response.usage_metadata.prompt_token_count)
- outputtokens = int(response.usage_metadata.candidates_token_count)
- print(resp, flush=True)
- print(f"Input Tokens: {inputtokens}", flush=True)
- print(f"Output Tokens: {outputtokens}", flush=True)
+ print(f"Input Tokens: {resp.in_token}", flush=True)
+ print(f"Output Tokens: {resp.out_token}", flush=True)
print("Send response...", flush=True)
- r = TextCompletionResponse(
- error=None,
- response=resp,
- in_token=inputtokens,
- out_token=outputtokens,
- model=self.model
- )
-
- await self.send(r, properties={"id": id})
-
- print("Done.", flush=True)
-
- # Acknowledge successful processing of the message
- self.consumer.acknowledge(msg)
+ return resp
except google.api_core.exceptions.ResourceExhausted as e:
@@ -186,40 +129,19 @@ class Processor(ConsumerProducer):
except Exception as e:
# Apart from rate limits, treat all exceptions as unrecoverable
-
print(f"Exception: {e}")
-
- print("Send error response...", flush=True)
-
- r = TextCompletionResponse(
- error=Error(
- type = "llm-error",
- message = str(e),
- ),
- response=None,
- in_token=None,
- out_token=None,
- model=None,
- )
-
- await self.send(r, properties={"id": id})
-
- self.consumer.acknowledge(msg)
+ raise e
@staticmethod
def add_args(parser):
- ConsumerProducer.add_args(
- parser, default_input_queue, default_subscriber,
- default_output_queue,
- )
+ LlmService.add_args(parser)
parser.add_argument(
'-m', '--model',
default=default_model,
help=f'LLM model (default: {default_model})'
)
- # Also: text-bison-32k
parser.add_argument(
'-k', '--private-key',
@@ -247,6 +169,5 @@ class Processor(ConsumerProducer):
)
def run():
-
- Processor.launch(module, __doc__)
+ Processor.launch(default_ident, __doc__)
diff --git a/trustgraph/setup.py b/trustgraph/setup.py
index d7185e66..bf846295 100644
--- a/trustgraph/setup.py
+++ b/trustgraph/setup.py
@@ -34,12 +34,12 @@ setuptools.setup(
python_requires='>=3.8',
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
install_requires=[
- "trustgraph-base>=0.21,<0.22",
- "trustgraph-bedrock>=0.21,<0.22",
- "trustgraph-cli>=0.21,<0.22",
- "trustgraph-embeddings-hf>=0.21,<0.22",
- "trustgraph-flow>=0.21,<0.22",
- "trustgraph-vertexai>=0.21,<0.22",
+ "trustgraph-base>=0.23,<0.24",
+ "trustgraph-bedrock>=0.23,<0.24",
+ "trustgraph-cli>=0.23,<0.24",
+ "trustgraph-embeddings-hf>=0.23,<0.24",
+ "trustgraph-flow>=0.23,<0.24",
+ "trustgraph-vertexai>=0.23,<0.24",
],
scripts=[
]