diff --git a/src/iai_mcp/qdrant_store.py b/src/iai_mcp/qdrant_store.py index 4900a4d..99e674c 100644 --- a/src/iai_mcp/qdrant_store.py +++ b/src/iai_mcp/qdrant_store.py @@ -934,13 +934,15 @@ class QdrantStore: if not coalesced: return {} - # Fetch existing edges from metadata collection + # Fetch existing edges from metadata collection. + # existing_map: edge_key -> (point_id, weight) so we can update + # in-place by reusing the existing point ID (avoids duplicates). all_edges = self._scroll_all(METADATA_TABLE, table_filter=EDGES_TABLE) - existing_map: dict[tuple[str, str, str], float] = {} + existing_map: dict[tuple[str, str, str], tuple[str, float]] = {} for point in all_edges: p = point.payload edge_key = (p.get("src", ""), p.get("dst", ""), p.get("edge_type", "")) - existing_map[edge_key] = float(p.get("weight", 0.0)) + existing_map[edge_key] = (str(point.id), float(p.get("weight", 0.0))) now = datetime.now(timezone.utc).isoformat() points_to_upsert: list[PointStruct] = [] @@ -949,13 +951,15 @@ class QdrantStore: for (src_str, dst_str), accum_delta in coalesced.items(): edge_key = (src_str, dst_str, edge_type) if edge_key in existing_map: - nw = existing_map[edge_key] + accum_delta + nw = existing_map[edge_key][1] + accum_delta + point_id = existing_map[edge_key][0] # reuse existing point ID else: nw = accum_delta + point_id = str(uuid4()) # new edge → new ID - # Create payload-only point (use UUID string for Qdrant compatibility) + # Create payload-only point (reuses existing ID to update in-place) points_to_upsert.append(PointStruct( - id=str(uuid4()), + id=point_id, vector={}, payload={ "table": EDGES_TABLE, @@ -992,17 +996,33 @@ class QdrantStore: return self.boost_edges([pair], delta=delta, edge_type=edge_type) def add_contradicts_edge(self, original: UUID, new_id: UUID) -> None: - """Add a contradicts edge in the metadata collection (table=edges).""" + """Add or update a contradicts edge in the metadata collection (table=edges). + + Reuses existing point ID if the edge already exists to avoid duplicates. + """ + src_str = str(original) + dst_str = str(new_id) + edge_key = (src_str, dst_str, "contradicts") + + # Check if edge already exists + all_edges = self._scroll_all(METADATA_TABLE, table_filter=EDGES_TABLE) + point_id = str(uuid4()) # default: new edge + for point in all_edges: + p = point.payload + if (p.get("src"), p.get("dst"), p.get("edge_type")) == edge_key: + point_id = str(point.id) + break + self._client.upsert( collection_name=METADATA_TABLE, points=[PointStruct( - id=str(uuid4()), + id=point_id, vector={}, payload={ "table": EDGES_TABLE, "group_id": self._group_id, - "src": str(original), - "dst": str(new_id), + "src": src_str, + "dst": dst_str, "edge_type": "contradicts", "weight": 1.0, "updated_at": datetime.now(timezone.utc).isoformat(), diff --git a/src/iai_mcp/sigma.py b/src/iai_mcp/sigma.py index df4f9ec..15ff064 100644 --- a/src/iai_mcp/sigma.py +++ b/src/iai_mcp/sigma.py @@ -151,11 +151,17 @@ def fast_sigma( def compute_sigma(graph: "nx.Graph", *, seed: int = 42) -> Optional[float]: """D-SIGMA-01: sigma at N>=SIGMA_N_FLOOR; otherwise None. - Returns None for graphs with fewer than SIGMA_N_FLOOR nodes -- below - that threshold, the random-graph baselines are too noisy to interpret - (Humphries-Gurney 2008). + Returns None for graphs whose largest connected component has fewer + than SIGMA_N_FLOOR nodes -- below that threshold, the random-graph + baselines are too noisy to interpret (Humphries-Gurney 2008). + + This checks the largest CC rather than total node count because a + graph with many isolated nodes (e.g. 311 nodes, 310 components) would + otherwise pass the floor check while the actual connected subgraph is + too small for meaningful sigma computation. """ - if graph.number_of_nodes() < SIGMA_N_FLOOR: + g_cc = _largest_cc(graph) + if g_cc.number_of_nodes() < SIGMA_N_FLOOR: return None sigma_val, *_ = fast_sigma(graph, seed=seed) if isinstance(sigma_val, float) and math.isnan(sigma_val):