Private/Merge-ChunkSummaries.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. # Merges multiple chunk-level POV summaries into a single consolidated summary. # Deduplicates key_points by taxonomy_node_id + point similarity, # unions factual_claims and unmapped_concepts, and removes exact duplicates. function Merge-ChunkSummaries { [CmdletBinding()] param( [Parameter(Mandatory)][object[]]$ChunkResults ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' # ── Merge key_points per camp ──────────────────────────────────────────── $Camps = @('accelerationist', 'safetyist', 'skeptic') $MergedPovSummaries = [ordered]@{} foreach ($Camp in $Camps) { $AllPoints = [System.Collections.Generic.List[object]]::new() $SeenKeys = [System.Collections.Generic.HashSet[string]]::new() foreach ($Chunk in $ChunkResults) { $CampData = $Chunk.pov_summaries.$Camp if (-not $CampData -or -not $CampData.key_points) { continue } foreach ($kp in $CampData.key_points) { # Build a dedup key: taxonomy_node_id + first 80 chars of point $PointPrefix = if ($kp.point.Length -gt 80) { $kp.point.Substring(0, 80) } else { $kp.point } $DedupKey = "$($kp.taxonomy_node_id)|$($PointPrefix.ToLowerInvariant().Trim())" if ($SeenKeys.Add($DedupKey)) { $AllPoints.Add($kp) } } } $MergedPovSummaries[$Camp] = [ordered]@{ key_points = @($AllPoints) } } # ── Merge factual_claims ───────────────────────────────────────────────── $AllClaims = [System.Collections.Generic.List[object]]::new() $SeenClaimLabels = [System.Collections.Generic.HashSet[string]]::new() foreach ($Chunk in $ChunkResults) { if (-not $Chunk.factual_claims) { continue } foreach ($Claim in $Chunk.factual_claims) { # Dedup on claim_label (lowercased) $ClaimKey = if ($Claim.claim_label) { $Claim.claim_label.ToLowerInvariant().Trim() } else { # Fallback: first 60 chars of claim text $ClaimText = if ($Claim.claim.Length -gt 60) { $Claim.claim.Substring(0, 60) } else { $Claim.claim } $ClaimText.ToLowerInvariant().Trim() } if ($SeenClaimLabels.Add($ClaimKey)) { $AllClaims.Add($Claim) } } } # ── Merge unmapped_concepts ────────────────────────────────────────────── $AllUnmapped = [System.Collections.Generic.List[object]]::new() $SeenLabels = [System.Collections.Generic.HashSet[string]]::new() foreach ($Chunk in $ChunkResults) { if (-not $Chunk.unmapped_concepts) { continue } foreach ($Concept in $Chunk.unmapped_concepts) { $HasLabel = $Concept.PSObject.Properties['suggested_label'] -and $Concept.suggested_label $LabelKey = if ($HasLabel) { $Concept.suggested_label.ToLowerInvariant().Trim() } else { "unknown-$($AllUnmapped.Count)" } if ($SeenLabels.Add($LabelKey)) { $AllUnmapped.Add($Concept) } } } # ── Return merged structure ────────────────────────────────────────────── return [ordered]@{ pov_summaries = $MergedPovSummaries factual_claims = @($AllClaims) unmapped_concepts = @($AllUnmapped) } } |