MSIX.Evidence.ps1
|
# ============================================================================= # Unified evidence model + confidence scoring # ----------------------------------------------------------------------------- # Every analyzer in the module (static scanner, heuristic finder, ProcMon # parser, trace parser, PE-imports scanner, manifest cross-checker, ...) # emits findings. Before this module, each analyzer used its own ad-hoc # pscustomobject and the report was just a concatenation. That made three # things hard: # # 1. Knowing whether the same problem was detected by multiple analyzers # ("the package writes to install dir" via static + Procmon + trace # should be ONE finding with three pieces of evidence, not three # separate findings). # 2. Confidence scoring — every finding was binary, even when one analyzer # was 95% sure and another was 30% sure. # 3. Auditability — operators couldn't tell WHICH signal triggered an # autofix, only that "the autofix fired". # # This file introduces a canonical evidence-bearing finding shape, math # for combining evidence into a single confidence score, and helpers for # merging findings across analyzers. # # Backwards-compatible by design: New-MsixFinding returns a pscustomobject # that has every legacy field (Severity, Category, Symptom, Recommendation, # Evidence, AppId) PLUS the new fields (Confidence, EvidenceItems[]). All # existing call sites continue to work unchanged. ConvertTo-MsixFinding wraps # a legacy pscustomobject with default weights so the orchestrator can # normalise the whole report without touching 86 emission sites. # # Confidence math: probabilistic OR with a saturating ceiling at 1. # confidence = 1 - product(1 - w_i) for each evidence item w_i # Rationale: independent signals reinforce each other but never reach # certainty unless one of them IS certain. Three weight-0.5 signals # combine to 1 - 0.5^3 = 0.875 (moderate-high). One weight-0.95 signal # alone reaches 0.95. Two weight-0.3 signals combine to 0.51 (just under # the autofix default threshold of 0.85, which is correct — two weak # signals shouldn't justify mutation). # ============================================================================= # Per-source default weights. Tuned conservatively — operators can override # any individual evidence item by passing -Weight on Add-MsixEvidence. $script:MsixEvidenceDefaultWeights = @{ 'procmon' = 0.9 # observed at runtime; near-certain 'trace-fixup' = 0.9 # observed via PSF TraceFixup; near-certain 'manifest' = 0.8 # declared in AppxManifest; high confidence 'registry-dat' = 0.8 # observed in package's Registry.dat 'pe-imports' = 0.6 # PE imports suggest, don't prove 'pe-strings' = 0.5 # strings in the binary 'filesystem' = 0.5 # file-existence heuristic 'static-analysis' = 0.5 # static scan finding 'manifest-cross' = 0.7 # cross-check against another manifest element 'heuristic' = 0.4 # weak generic signal 'user-supplied' = 1.0 # the operator passed it in by hand } # Per-severity default weights used by ConvertTo-MsixFinding when a legacy # finding has no evidence items. Maps to roughly the same scale as the # per-source table above. $script:MsixEvidenceSeverityWeights = @{ 'Error' = 0.95 'Warning' = 0.70 'Info' = 0.40 'Debug' = 0.20 } # Threshold gates. Operators can override per-call on Invoke-MsixAutoFixFromAnalysis. # >= MinConfidenceAutoFix : autofix fires # >= MinConfidenceReport : surfaced as Recommendation only # < MinConfidenceReport : debug-level (suppressed by default) $script:MsixEvidenceDefaultConfidenceAutoFix = 0.85 $script:MsixEvidenceDefaultConfidenceReport = 0.50 function Get-MsixFindingConfidence { <# .SYNOPSIS Computes the roll-up confidence of a finding from its EvidenceItems[] using probabilistic OR. .DESCRIPTION confidence = 1 - product(1 - w_i) Returns 0.0 for findings with no evidence items, 1.0 if any single evidence item has weight 1.0. Clamps each weight to [0,1] before combining so a malformed entry can't push the result negative or above 1.0. Independent of Severity: severity is the analyst's classification of impact, not the analyst's confidence that the impact exists. .PARAMETER Finding A finding object (the shape returned by New-MsixFinding) or any object that exposes an EvidenceItems array of items with a numeric .Weight property. .OUTPUTS [double] in [0.0, 1.0] #> [CmdletBinding()] [OutputType([double])] param([Parameter(Mandatory, ValueFromPipeline)] $Finding) process { $items = @($Finding.EvidenceItems) if (-not $items -or $items.Count -eq 0) { return 0.0 } $compound = 1.0 foreach ($e in $items) { $w = [double]$e.Weight if ($w -lt 0) { $w = 0 } elseif ($w -gt 1) { $w = 1 } $compound *= (1.0 - $w) } return [math]::Round(1.0 - $compound, 4) } } function New-MsixFinding { <# .SYNOPSIS Constructs a finding in the canonical evidence-graph shape. .DESCRIPTION Returns a pscustomobject with both the legacy fields (Severity, Category, Symptom, Recommendation, Evidence, AppId) so existing consumers stay green AND the new fields (Confidence, EvidenceItems[]) so future consumers can pivot on per-source provenance and confidence scores. Initial evidence items can be supplied via -EvidenceItems; they will be passed straight through to Get-MsixFindingConfidence to compute the initial Confidence. Use Add-MsixEvidence to add more items after construction. .PARAMETER Category Stable identifier — the same string SARIF emits as ruleId (prefixed with 'MSIX.'). Examples: 'WorkingDirectory', 'ManifestFix:FileSystemWriteVirtualization', 'ShellExt'. .PARAMETER Severity Analyst's classification of impact. One of Error / Warning / Info / Debug. .PARAMETER Symptom Single human-readable sentence describing what was observed. .PARAMETER Recommendation Operator-facing remediation hint (cmdlet name + args, typically). Surfaces in SARIF result.properties.recommendation. .PARAMETER Evidence Legacy evidence string (single, free-form). Retained for the pscustomobject projection. Per-source structured evidence belongs in -EvidenceItems instead. .PARAMETER AppId The Application/@Id this finding applies to, when scoped to a single Application. $null for package-wide findings. .PARAMETER EvidenceItems Array of evidence entries (hashtables/pscustomobjects). Each entry should have at least Source and Weight; additional per-source properties (Path, Result, FilePath, etc.) ride along. .OUTPUTS [pscustomobject] PSTypeName 'MsixFinding'. #> [CmdletBinding()] [OutputType([pscustomobject])] [Diagnostics.CodeAnalysis.SuppressMessageAttribute( 'PSUseShouldProcessForStateChangingFunctions', '', Justification = 'Pure constructor — returns a pscustomobject, no IO, no global state change. PSSA flags any New-* verb without seeing the body.')] param( [Parameter(Mandatory)] [string]$Category, [Parameter(Mandatory)] [ValidateSet('Error','Warning','Info','Debug')] [string]$Severity, [Parameter(Mandatory)] [string]$Symptom, [string]$Recommendation, [string]$Evidence, [string]$AppId, [object[]]$EvidenceItems = @() ) $finding = [pscustomobject]@{ PSTypeName = 'MsixFinding' # Legacy fields kept verbatim so every existing consumer (SARIF, # autofix orchestrator, Get-MsixCompatibilityReport report) reads # the new shape without code changes. Severity = $Severity Category = $Category Symptom = $Symptom Recommendation = $Recommendation Evidence = $Evidence AppId = $AppId # New fields — additive. Force the cast to [object[]] so PowerShell # doesn't unroll a one-element list to a scalar (breaks .Count). Confidence = 0.0 EvidenceItems = [object[]] @($EvidenceItems) } $finding.Confidence = Get-MsixFindingConfidence -Finding $finding return $finding } function Add-MsixEvidence { <# .SYNOPSIS Appends an evidence item to a finding and recomputes Confidence. .DESCRIPTION Mutates the finding in place. The supplied -Source must be a known source name (see $script:MsixEvidenceDefaultWeights) OR the caller must pass an explicit -Weight to override the lookup. Extra named parameters are stored as properties of the evidence item alongside Source and Weight — e.g. -Path / -Result / -Operation for ProcMon evidence, -Import / -Module for PE-imports evidence. .PARAMETER Finding Finding object from New-MsixFinding (or a legacy finding promoted via ConvertTo-MsixFinding). .PARAMETER Source Provenance label. Should be one of: procmon, trace-fixup, manifest, registry-dat, pe-imports, pe-strings, filesystem, static-analysis, manifest-cross, heuristic, user-supplied. .PARAMETER Weight Override the default weight. Use only when you have a good reason — e.g. "this static-analysis signal happens to be unambiguous in this specific case" or "this Procmon hit is low confidence because it's intermittent". .PARAMETER Properties Hashtable of extra per-source properties to attach to the evidence item. Anything goes here; SARIF picks it up verbatim. .OUTPUTS The same finding object (for fluent chaining: New-MsixFinding ... | Add-MsixEvidence ...). #> [CmdletBinding()] [OutputType([pscustomobject])] param( [Parameter(Mandatory, ValueFromPipeline)] $Finding, [Parameter(Mandatory)] [string]$Source, # When -Weight isn't passed, we look up the per-source default # (or 0.3 for an unknown source). When it IS passed, we honour # whatever value the caller gave — INCLUDING negative values, # which are clamped to 0 inside Get-MsixFindingConfidence (the # "out of range" test relies on that). # Detecting "passed vs not passed" via $PSBoundParameters so a # legitimate -Weight -2 doesn't accidentally collide with a # sentinel. [double]$Weight, [hashtable]$Properties ) process { if (-not $PSBoundParameters.ContainsKey('Weight')) { if ($script:MsixEvidenceDefaultWeights.ContainsKey($Source)) { $Weight = $script:MsixEvidenceDefaultWeights[$Source] } else { $Weight = 0.3 # unknown source: weak default, never 0 } } $entry = [ordered]@{ Source = $Source; Weight = [double]$Weight } if ($Properties) { foreach ($k in $Properties.Keys) { $entry[$k] = $Properties[$k] } } # Assign as a real [object[]]. The forced cast keeps PowerShell # from unwrapping a single-element array, which would otherwise # collapse "one evidence item" to a scalar and break .Count # checks downstream. $Finding.EvidenceItems = [object[]] (@($Finding.EvidenceItems) + [pscustomobject]$entry) $Finding.Confidence = Get-MsixFindingConfidence -Finding $Finding return $Finding } } function ConvertTo-MsixFinding { <# .SYNOPSIS Converts a legacy pscustomobject finding into a MsixFinding without touching the original analyzer that emitted it. .DESCRIPTION The orchestrator (Get-MsixCompatibilityReport) calls this on every finding before they go into the report, so consumers downstream see a uniform shape. Legacy findings with no structured evidence get a single synthetic evidence item whose Source = the analyzer-origin guess (defaults to 'static-analysis') and whose Weight matches the per-severity default table. This way: - Error findings come out at confidence 0.95 - Warning findings at 0.70 - Info findings at 0.40 ... matching what the existing autofix orchestrator implicitly treated as "fire" vs "recommend" vs "ignore". No behavioural change for callers that don't ask for the confidence field. If the legacy finding already had an EvidenceItems[] array, this is a no-op (the finding is already promoted). .PARAMETER Finding Legacy pscustomobject finding (Severity/Category/Symptom/...). .PARAMETER Source Source label to attach to the synthetic evidence item. Defaults to 'static-analysis' which is the most common origin. .OUTPUTS [pscustomobject] PSTypeName 'MsixFinding'. #> [CmdletBinding()] [OutputType([pscustomobject])] param( [Parameter(Mandatory, ValueFromPipeline)] $Finding, [string]$Source = 'static-analysis' ) process { if (-not $Finding) { return $null } # Already promoted — don't re-wrap. if ($Finding.PSObject.TypeNames -contains 'MsixFinding' -and ` $Finding.PSObject.Properties.Match('EvidenceItems').Count -gt 0) { return $Finding } $severity = if ($Finding.Severity) { [string]$Finding.Severity } else { 'Info' } $weight = if ($script:MsixEvidenceSeverityWeights.ContainsKey($severity)) { $script:MsixEvidenceSeverityWeights[$severity] } else { 0.4 } $f = New-MsixFinding ` -Category ([string]$Finding.Category) ` -Severity $severity ` -Symptom ([string]$Finding.Symptom) ` -Recommendation ([string]$Finding.Recommendation) ` -Evidence ([string]$Finding.Evidence) ` -AppId ([string]$Finding.AppId) $props = @{} if ($Finding.Evidence) { $props['EvidenceText'] = [string]$Finding.Evidence } Add-MsixEvidence -Finding $f -Source $Source -Weight $weight -Properties $props | Out-Null # Tag the synthetic-evidence wrapper so the orchestrator can tell # "this came through the legacy adapter; let it through regardless # of confidence" apart from "this is a low-weight new-shape # finding the operator intentionally produced; respect the gate". $f | Add-Member -NotePropertyName 'PromotedFromLegacy' -NotePropertyValue $true -Force return $f } } function Merge-MsixFinding { <# .SYNOPSIS Merges multiple MsixFinding objects covering the same Category + AppId into one finding by combining their evidence lists. .DESCRIPTION Two findings are considered "the same finding" when they have the same Category AND the same AppId (null counts as a match). All evidence items from the duplicates are concatenated onto the primary finding and Confidence is recomputed. Severity stays as the maximum of the two (Error > Warning > Info > Debug); Symptom and Recommendation come from the higher-severity finding (so the "best" wording survives). This is the function the orchestrator runs after concatenating analyzer outputs. Static + Procmon + heuristic all detecting the same install-dir-write issue collapse from three rows into one row with three pieces of evidence. .PARAMETER Findings Array of MsixFinding (or promotable legacy findings — they are promoted on the fly). .OUTPUTS [object[]] de-duplicated MsixFinding array. #> [CmdletBinding()] [OutputType([object[]])] param([Parameter(Mandatory, ValueFromPipeline)] [object[]]$Findings) begin { $severityRank = @{ 'Error'=3; 'Warning'=2; 'Info'=1; 'Debug'=0 } $byKey = @{} } process { # Promote any not-yet-promoted entries, then fold each into $byKey # keyed by (Category, AppId). Without a real process block here, # pipeline input would drop every batch except the last one. foreach ($raw in $Findings) { if (-not $raw) { continue } $f = if ($raw.PSObject.TypeNames -contains 'MsixFinding' -and ` $raw.PSObject.Properties.Match('EvidenceItems').Count -gt 0) { $raw } else { ConvertTo-MsixFinding -Finding $raw } if (-not $f) { continue } $appId = if ($null -ne $f.AppId) { $f.AppId } else { '' } $key = ('{0}|{1}' -f $f.Category, $appId) if (-not $byKey.ContainsKey($key)) { $byKey[$key] = $f continue } $primary = $byKey[$key] # Concatenate evidence (de-dupe identical Source+Path tuples). $combined = @($primary.EvidenceItems) + @($f.EvidenceItems) $seen = New-Object System.Collections.Generic.HashSet[string] $deduped = @($combined | Where-Object { $sig = ('{0}|{1}' -f $_.Source, ($_.PSObject.Properties['Path'] | ForEach-Object Value)) $seen.Add($sig) }) $primary.EvidenceItems = [object[]]$deduped $primary.Confidence = Get-MsixFindingConfidence -Finding $primary # Promote severity to the higher rank. $pRank = if ($severityRank.ContainsKey($primary.Severity)) { $severityRank[$primary.Severity] } else { 1 } $fRank = if ($severityRank.ContainsKey($f.Severity)) { $severityRank[$f.Severity] } else { 1 } if ($fRank -gt $pRank) { $primary.Severity = $f.Severity $primary.Symptom = $f.Symptom $primary.Recommendation = $f.Recommendation } } } end { return @($byKey.Values) } } function ConvertTo-MsixLegacyFinding { <# .SYNOPSIS Strips the evidence list and confidence field, returning the old-shape pscustomobject. Useful for callers that haven't been updated and still expect the original fields only. .DESCRIPTION New-MsixFinding already includes every legacy field on the object, so this is mostly a passthrough; the explicit conversion exists so callers can request the narrowed shape without relying on PowerShell's quiet property-tolerance. #> [CmdletBinding()] [OutputType([pscustomobject])] param([Parameter(Mandatory, ValueFromPipeline)] $Finding) process { [pscustomobject]@{ Severity = $Finding.Severity Category = $Finding.Category Symptom = $Finding.Symptom Recommendation = $Finding.Recommendation Evidence = $Finding.Evidence AppId = $Finding.AppId } } } |