1818 MetricSnapshot ,
1919 TwinScenario ,
2020)
21- from contextmine_core .twin import (
22- GraphProjection ,
23- get_full_scenario_graph ,
24- get_scenario_provenance_node_ids ,
25- )
21+ from contextmine_core .twin import get_full_scenario_graph , get_scenario_provenance_node_ids
2622from contextmine_core .twin .grouping import canonical_file_path_from_meta , derive_arch_group
2723from sqlalchemy import select
2824from sqlalchemy .ext .asyncio import AsyncSession
2925
26+ from .recovery import recover_architecture_model
27+ from .recovery_docs import load_recovery_docs
3028from .schemas import ArchitectureFact , ArchitectureFactsBundle , EvidenceRef , PortAdapterFact
3129
3230DETERMINISTIC_CONFIDENCE = 0.9
6260
6361_derive_arch_group = derive_arch_group
6462_canonical_file_path = canonical_file_path_from_meta
63+ _compat_get_full_scenario_graph = get_full_scenario_graph
6564
6665
6766def _evidence_from_symbol_meta (node : KnowledgeNode ) -> tuple [EvidenceRef , ...]:
@@ -248,6 +247,209 @@ def _dedupe_ports(facts: list[PortAdapterFact]) -> list[PortAdapterFact]:
248247 return sorted (by_id .values (), key = lambda row : row .fact_id )
249248
250249
250+ def _kg_nodes_to_recovery_inputs (
251+ nodes : list [KnowledgeNode ],
252+ ) -> tuple [list [dict [str , Any ]], dict [UUID , str ]]:
253+ subject_ref_by_id : dict [UUID , str ] = {}
254+ recovery_nodes : list [dict [str , Any ]] = []
255+ for node in nodes :
256+ subject_ref = str (node .natural_key or node .id )
257+ subject_ref_by_id [node .id ] = subject_ref
258+ recovery_nodes .append (
259+ {
260+ "id" : subject_ref ,
261+ "kind" : node .kind ,
262+ "name" : node .name ,
263+ "natural_key" : node .natural_key ,
264+ "meta" : node .meta or {},
265+ }
266+ )
267+ return recovery_nodes , subject_ref_by_id
268+
269+
270+ def _kg_edges_to_recovery_inputs (
271+ edges : list [KnowledgeEdge ],
272+ subject_ref_by_id : dict [UUID , str ],
273+ ) -> list [dict [str , Any ]]:
274+ recovery_edges : list [dict [str , Any ]] = []
275+ for edge in edges :
276+ source_ref = subject_ref_by_id .get (edge .source_node_id )
277+ target_ref = subject_ref_by_id .get (edge .target_node_id )
278+ if not source_ref or not target_ref :
279+ continue
280+ recovery_edges .append (
281+ {
282+ "source_node_id" : source_ref ,
283+ "target_node_id" : target_ref ,
284+ "kind" : edge .kind ,
285+ "meta" : {},
286+ }
287+ )
288+ return recovery_edges
289+
290+
291+ def _recovered_entity_to_fact (entity : Any ) -> ArchitectureFact :
292+ return ArchitectureFact (
293+ fact_id = f"recovered_entity:{ entity .entity_id } " ,
294+ fact_type = entity .kind ,
295+ title = f"{ entity .kind .replace ('_' , ' ' ).title ()} { entity .name } " ,
296+ description = f"Recovered { entity .kind .replace ('_' , ' ' )} '{ entity .name } '" ,
297+ source = "deterministic" ,
298+ confidence = float (entity .confidence ),
299+ tags = ("recovered" , entity .kind ),
300+ attributes = {"entity_id" : entity .entity_id , ** entity .attributes },
301+ evidence = entity .evidence ,
302+ )
303+
304+
305+ def _recovered_relationship_to_fact (relationship : Any ) -> ArchitectureFact :
306+ return ArchitectureFact (
307+ fact_id = (
308+ f"recovered_relationship:{ relationship .source_entity_id } :"
309+ f"{ relationship .kind } :{ relationship .target_entity_id } "
310+ ),
311+ fact_type = "recovered_relationship" ,
312+ title = "Recovered relationship" ,
313+ description = (
314+ f"{ relationship .source_entity_id } { relationship .kind } { relationship .target_entity_id } "
315+ ),
316+ source = "deterministic" ,
317+ confidence = float (relationship .confidence ),
318+ tags = ("recovered" , "relationship" , relationship .kind ),
319+ attributes = {
320+ "source_entity_id" : relationship .source_entity_id ,
321+ "target_entity_id" : relationship .target_entity_id ,
322+ "relationship_kind" : relationship .kind ,
323+ ** relationship .attributes ,
324+ },
325+ evidence = relationship .evidence ,
326+ )
327+
328+
329+ def _recovered_hypothesis_to_fact (hypothesis : Any ) -> ArchitectureFact :
330+ return ArchitectureFact (
331+ fact_id = f"recovered_hypothesis:{ hypothesis .subject_ref } " ,
332+ fact_type = "recovered_hypothesis" ,
333+ title = f"Recovered hypothesis for { hypothesis .subject_ref } " ,
334+ description = hypothesis .rationale ,
335+ source = "hybrid" ,
336+ confidence = float (hypothesis .confidence ),
337+ tags = ("recovered" , "hypothesis" , hypothesis .status ),
338+ attributes = {
339+ "subject_ref" : hypothesis .subject_ref ,
340+ "candidate_entity_ids" : list (hypothesis .candidate_entity_ids ),
341+ "selected_entity_ids" : list (hypothesis .selected_entity_ids ),
342+ "status" : hypothesis .status ,
343+ },
344+ evidence = hypothesis .evidence ,
345+ )
346+
347+
348+ def _collect_recovered_architecture_facts (bundle : ArchitectureFactsBundle , model : Any ) -> None :
349+ for entity in model .entities :
350+ bundle .facts .append (_recovered_entity_to_fact (entity ))
351+ for relationship in model .relationships :
352+ bundle .facts .append (_recovered_relationship_to_fact (relationship ))
353+ for hypothesis in model .hypotheses :
354+ bundle .facts .append (_recovered_hypothesis_to_fact (hypothesis ))
355+ for decision in getattr (model , "decisions" , ()):
356+ bundle .facts .append (
357+ ArchitectureFact (
358+ fact_id = f"recovered_decision:{ decision .title } " ,
359+ fact_type = "architecture_decision" ,
360+ title = decision .title ,
361+ description = decision .summary ,
362+ source = "deterministic" ,
363+ confidence = float (decision .confidence ),
364+ tags = ("recovered" , "decision" , decision .status ),
365+ attributes = {
366+ "affected_entity_ids" : list (decision .affected_entity_ids ),
367+ "status" : decision .status ,
368+ },
369+ evidence = decision .evidence ,
370+ )
371+ )
372+
373+
374+ def _entity_kind (entity_id : str ) -> str :
375+ return entity_id .split (":" , 1 )[0 ]
376+
377+
378+ def _best_membership (memberships : list [Any ], kind : str ) -> Any | None :
379+ candidates = [
380+ membership for membership in memberships if _entity_kind (membership .entity_id ) == kind
381+ ]
382+ if not candidates :
383+ return None
384+ return sorted (candidates , key = lambda row : (- float (row .confidence ), row .entity_id ))[0 ]
385+
386+
387+ def _enrich_ports_with_recovery (ports : list [PortAdapterFact ], model : Any ) -> list [PortAdapterFact ]:
388+ entity_name_by_id = {entity .entity_id : entity .name for entity in model .entities }
389+ enriched : list [PortAdapterFact ] = []
390+ for port in ports :
391+ subject_refs = [
392+ str (port .attributes .get ("natural_key" ) or "" ).strip (),
393+ str (port .attributes .get ("source_natural_key" ) or "" ).strip (),
394+ ]
395+ memberships : list [Any ] = []
396+ for subject_ref in subject_refs :
397+ if not subject_ref :
398+ continue
399+ memberships .extend (model .memberships_for (subject_ref ))
400+
401+ if not memberships :
402+ enriched .append (port )
403+ continue
404+
405+ candidate_memberships = sorted ({membership .entity_id for membership in memberships })
406+ best_container = _best_membership (memberships , "container" )
407+ best_component = _best_membership (memberships , "component" )
408+
409+ container = port .container
410+ if best_container is not None :
411+ container = best_container .entity_id .split (":" , 1 )[1 ]
412+
413+ component = port .component
414+ if best_component is not None :
415+ component = entity_name_by_id .get (best_component .entity_id , component )
416+
417+ enriched .append (
418+ replace (
419+ port ,
420+ container = container ,
421+ component = component ,
422+ confidence = max (
423+ float (port .confidence ),
424+ max (float (membership .confidence ) for membership in memberships ),
425+ ),
426+ attributes = {
427+ ** port .attributes ,
428+ "candidate_memberships" : candidate_memberships ,
429+ },
430+ )
431+ )
432+ return enriched
433+
434+
435+ def _append_recovery_warning_counts (bundle : ArchitectureFactsBundle , model : Any ) -> None :
436+ unresolved_count = sum (
437+ 1 for hypothesis in model .hypotheses if hypothesis .status == "unresolved"
438+ )
439+ rejected_count = sum (
440+ 1 for warning in model .warnings if "rejected adjudication" in warning .lower ()
441+ )
442+ missing_packet_count = sum (
443+ 1 for warning in model .warnings if "missing evidence packet" in warning .lower ()
444+ )
445+ if unresolved_count :
446+ bundle .warnings .append (f"unresolved_hypotheses={ unresolved_count } " )
447+ if rejected_count :
448+ bundle .warnings .append (f"rejected_llm_adjudications={ rejected_count } " )
449+ if missing_packet_count :
450+ bundle .warnings .append (f"missing_evidence_packets={ missing_packet_count } " )
451+
452+
251453def _collect_container_facts (bundle : ArchitectureFactsBundle , container_graph : dict ) -> None :
252454 """Add container facts from the architecture graph."""
253455 for node in container_graph ["nodes" ]:
@@ -386,26 +588,6 @@ async def build_architecture_facts(
386588 "ARCH_DOCS_LLM_ENRICH is enabled but no LLM provider is available; using deterministic fallback."
387589 )
388590
389- container_graph = await get_full_scenario_graph (
390- session = session ,
391- scenario_id = scenario_id ,
392- layer = None ,
393- projection = GraphProjection .ARCHITECTURE ,
394- entity_level = "container" ,
395- include_kinds = {"file" },
396- )
397- component_graph = await get_full_scenario_graph (
398- session = session ,
399- scenario_id = scenario_id ,
400- layer = None ,
401- projection = GraphProjection .ARCHITECTURE ,
402- entity_level = "component" ,
403- include_kinds = {"file" },
404- )
405-
406- _collect_container_facts (bundle , container_graph )
407- _collect_component_facts (bundle , component_graph )
408-
409591 await _collect_c4_view_facts (session , scenario_id , bundle )
410592
411593 metrics = (
@@ -506,6 +688,18 @@ async def build_architecture_facts(
506688 .scalars ()
507689 .all ()
508690 )
691+ recovery_nodes , recovery_subject_refs = _kg_nodes_to_recovery_inputs (kg_nodes )
692+ recovery_edges = _kg_edges_to_recovery_inputs (kg_edges , recovery_subject_refs )
693+ recovery_docs = await load_recovery_docs (session , kg_nodes )
694+ recovered_model = recover_architecture_model (
695+ recovery_nodes ,
696+ recovery_edges ,
697+ docs = recovery_docs ,
698+ llm_adjudicator = llm_provider if enable_llm_enrich else None ,
699+ )
700+ _collect_recovered_architecture_facts (bundle , recovered_model )
701+ _append_recovery_warning_counts (bundle , recovered_model )
702+
509703 outbound_edges = [
510704 edge
511705 for edge in kg_edges
@@ -531,6 +725,7 @@ async def build_architecture_facts(
531725 )
532726 ports = enriched
533727
728+ ports = _enrich_ports_with_recovery (ports , recovered_model )
534729 bundle .ports_adapters = _dedupe_ports (ports )
535730
536731 return bundle
0 commit comments