diff --git a/templates/stores/neo4j.jsonnet b/templates/stores/neo4j.jsonnet index 55cccc5f..3a8bb783 100644 --- a/templates/stores/neo4j.jsonnet +++ b/templates/stores/neo4j.jsonnet @@ -14,12 +14,14 @@ local images = import "values/images.jsonnet"; .with_image(images.neo4j) .with_environment({ NEO4J_AUTH: "neo4j/password", + NEO4J_server_memory_pagecache_size: "512m", + NEO4J_server_memory_heap_max__size: "512m", // NEO4J_server_bolt_listen__address: "0.0.0.0:7687", // NEO4J_server_default__listen__address: "0.0.0.0", // NEO4J_server_http_listen__address: "0.0.0.0:7474", }) - .with_limits("1.0", "768M") - .with_reservations("0.5", "768M") + .with_limits("1.0", "1536M") + .with_reservations("0.5", "1536M") .with_port(7474, 7474, "api") .with_port(7687, 7687, "api2") .with_volume_mount(vol, "/data"); diff --git a/templates/values/images.jsonnet b/templates/values/images.jsonnet index 9da4e89f..b0416eb3 100644 --- a/templates/values/images.jsonnet +++ b/templates/values/images.jsonnet @@ -1,7 +1,7 @@ local version = import "version.jsonnet"; { cassandra: "docker.io/cassandra:4.1.6", - neo4j: "docker.io/neo4j:5.22.0-community-bullseye", + neo4j: "docker.io/neo4j:5.26.0-community-bullseye", pulsar: "docker.io/apachepulsar/pulsar:3.3.1", pulsar_manager: "docker.io/apachepulsar/pulsar-manager:v0.4.0", etcd: "quay.io/coreos/etcd:v3.5.15", diff --git a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py index f106170a..7295e691 100755 --- a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py @@ -55,6 +55,14 @@ class Processor(Consumer): def create_indexes(self, session): + # Race condition, index creation failure is ignored. Right thing + # to do if the index already exists. Wrong thing to do if it's + # because the store is not up yet + + # In real-world cases, Memgraph will start up quicker than Pulsar + # and this process will restart several times until Pulsar arrives, + # so should be safe + print("Create indexes...", flush=True) try: diff --git a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py index 1aa25aa8..18b40129 100755 --- a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py @@ -50,6 +50,50 @@ class Processor(Consumer): self.io = GraphDatabase.driver(graph_host, auth=(username, password)) + with self.io.session(database=self.db) as session: + self.create_indexes(session) + + def create_indexes(self, session): + + # Race condition, index creation failure is ignored. Right thing + # to do if the index already exists. Wrong thing to do if it's + # because the store is not up yet + + # In real-world cases, Neo4j will start up quicker than Pulsar + # and this process will restart several times until Pulsar arrives, + # so should be safe + + print("Create indexes...", flush=True) + + try: + session.run( + "CREATE INDEX Node_uri FOR (n:Node) ON (n.uri)", + ) + except Exception as e: + print(e, flush=True) + # Maybe index already exists + print("Index create failure ignored", flush=True) + + try: + session.run( + "CREATE INDEX Literal_value FOR (n:Literal) ON (n.value)", + ) + except Exception as e: + print(e, flush=True) + # Maybe index already exists + print("Index create failure ignored", flush=True) + + try: + session.run( + "CREATE INDEX Rel_uri FOR ()-[r:Rel]-() ON (r.uri)", + ) + except Exception as e: + print(e, flush=True) + # Maybe index already exists + print("Index create failure ignored", flush=True) + + print("Index creation done", flush=True) + def create_node(self, uri): print("Create node", uri)