Private/Invoke-ScoreCandidates.ps1
|
<#
.SYNOPSIS Score and rank candidate documents against query tokens. #> function Invoke-ScoreCandidates { <# .SYNOPSIS Score index entries against query tokens and return ranked results. .PARAMETER QueryTokens Array of normalized query tokens. .PARAMETER IndexEntries Array of index entries to score. .PARAMETER Top Number of top results to return. .OUTPUTS Array of scored results with confidence and match explanation. #> [CmdletBinding()] [OutputType([array])] param( [Parameter(Mandatory)] [string[]]$QueryTokens, [Parameter(Mandatory)] [array]$IndexEntries, [Parameter()] [int]$Top = 5 ) if ($QueryTokens.Count -eq 0) { Write-Warning "No query tokens provided for scoring." return @() } if ($IndexEntries.Count -eq 0) { Write-Warning "No index entries to score." return @() } $scoredResults = @() foreach ($entry in $IndexEntries) { $score = 0.0 $matchedTokens = @() $matchReasons = @() # High-value: exact substring match in full content (especially dot-delimited identifiers) foreach ($token in $QueryTokens) { if ($token -like '*.*' -and $entry.Content -like "*$token*") { $score += 10.0 $matchedTokens += $token $matchReasons += "Exact identifier match: $token" } } # Medium-value: token overlap (Jaccard) $entryTokens = $entry.Tokens if ($entryTokens) { $intersection = @($QueryTokens | Where-Object { $_ -in $entryTokens }) $union = @(($QueryTokens + $entryTokens) | Select-Object -Unique) if ($union.Count -gt 0) { $jaccard = $intersection.Count / $union.Count $score += $jaccard * 5.0 if ($intersection.Count -gt 0) { $matchedTokens += $intersection $matchReasons += "Token overlap: $($intersection.Count)/$($QueryTokens.Count) query tokens matched" } } } # Lower-value: fuzzy match on title if ($entry.Title) { $queryString = $QueryTokens -join ' ' $fuzzyScore = Invoke-FuzzyScore -String1 $queryString -String2 $entry.Title.ToLowerInvariant() if ($fuzzyScore -gt 0.6) { $score += $fuzzyScore * 2.0 $matchReasons += "Fuzzy title match: $([math]::Round($fuzzyScore * 100))%" } } # Only include candidates with non-zero score if ($score -gt 0) { $confidence = [math]::Min(100, [math]::Round($score * 10)) $scoredResults += [PSCustomObject]@{ Title = $entry.Title Source = $entry.Source Url = $entry.Url Confidence = $confidence MatchedTokens = ($matchedTokens | Select-Object -Unique) -join ', ' MatchReason = $matchReasons -join ' | ' FixSummary = $entry.FixSummary FixSteps = $entry.FixSteps Snippet = if ($entry.Content.Length -gt 300) { $entry.Content.Substring(0, 300) + '...' } else { $entry.Content } RawScore = $score } } } # Sort by score descending and return top N $rankedResults = @($scoredResults | Sort-Object -Property RawScore -Descending | Select-Object -First $Top) # Remove RawScore from output foreach ($result in $rankedResults) { $result.PSObject.Properties.Remove('RawScore') } return $rankedResults } |