msix

0.70.5

MSIX.Evidence.ps1

                                # =============================================================================

# Unified evidence model + confidence scoring

# -----------------------------------------------------------------------------

# Every analyzer in the module (static scanner, heuristic finder, ProcMon

# parser, trace parser, PE-imports scanner, manifest cross-checker, ...)

# emits findings. Before this module, each analyzer used its own ad-hoc

# pscustomobject and the report was just a concatenation. That made three

# things hard:

#

#   1. Knowing whether the same problem was detected by multiple analyzers

#      ("the package writes to install dir" via static + Procmon + trace

#      should be ONE finding with three pieces of evidence, not three

#      separate findings).

#   2. Confidence scoring — every finding was binary, even when one analyzer

#      was 95% sure and another was 30% sure.

#   3. Auditability — operators couldn't tell WHICH signal triggered an

#      autofix, only that "the autofix fired".

#

# This file introduces a canonical evidence-bearing finding shape, math

# for combining evidence into a single confidence score, and helpers for

# merging findings across analyzers.

#

# Backwards-compatible by design: New-MsixFinding returns a pscustomobject

# that has every legacy field (Severity, Category, Symptom, Recommendation,

# Evidence, AppId) PLUS the new fields (Confidence, EvidenceItems[]). All

# existing call sites continue to work unchanged. ConvertTo-MsixFinding wraps

# a legacy pscustomobject with default weights so the orchestrator can

# normalise the whole report without touching 86 emission sites.

#

# Confidence math: probabilistic OR with a saturating ceiling at 1.

#   confidence = 1 - product(1 - w_i) for each evidence item w_i

# Rationale: independent signals reinforce each other but never reach

# certainty unless one of them IS certain. Three weight-0.5 signals

# combine to 1 - 0.5^3 = 0.875 (moderate-high). One weight-0.95 signal

# alone reaches 0.95. Two weight-0.3 signals combine to 0.51 (just under

# the autofix default threshold of 0.85, which is correct — two weak

# signals shouldn't justify mutation).

# =============================================================================

# Per-source default weights. Tuned conservatively — operators can override

# any individual evidence item by passing -Weight on Add-MsixEvidence.

$script:MsixEvidenceDefaultWeights = @{

    'procmon'         = 0.9    # observed at runtime; near-certain

    'trace-fixup'     = 0.9    # observed via PSF TraceFixup; near-certain

    'manifest'        = 0.8    # declared in AppxManifest; high confidence

    'registry-dat'    = 0.8    # observed in package's Registry.dat

    'pe-imports'      = 0.6    # PE imports suggest, don't prove

    'pe-strings'      = 0.5    # strings in the binary

    'filesystem'      = 0.5    # file-existence heuristic

    'static-analysis' = 0.5    # static scan finding

    'manifest-cross'  = 0.7    # cross-check against another manifest element

    'heuristic'       = 0.4    # weak generic signal

    'user-supplied'   = 1.0    # the operator passed it in by hand

}

# Per-severity default weights used by ConvertTo-MsixFinding when a legacy

# finding has no evidence items. Maps to roughly the same scale as the

# per-source table above.

$script:MsixEvidenceSeverityWeights = @{

    'Error'   = 0.95

    'Warning' = 0.70

    'Info'    = 0.40

    'Debug'   = 0.20

}

# Threshold gates. Operators can override per-call on Invoke-MsixAutoFixFromAnalysis.

#   >= MinConfidenceAutoFix : autofix fires

#   >= MinConfidenceReport  : surfaced as Recommendation only

#   <  MinConfidenceReport  : debug-level (suppressed by default)

$script:MsixEvidenceDefaultConfidenceAutoFix = 0.85

$script:MsixEvidenceDefaultConfidenceReport  = 0.50

function Get-MsixFindingConfidence {

    <#

    .SYNOPSIS

        Computes the roll-up confidence of a finding from its EvidenceItems[]

        using probabilistic OR.

    .DESCRIPTION

        confidence = 1 - product(1 - w_i)

        Returns 0.0 for findings with no evidence items, 1.0 if any single

        evidence item has weight 1.0. Clamps each weight to [0,1] before

        combining so a malformed entry can't push the result negative or

        above 1.0.

        Independent of Severity: severity is the analyst's classification

        of impact, not the analyst's confidence that the impact exists.

    .PARAMETER Finding

        A finding object (the shape returned by New-MsixFinding) or any

        object that exposes an EvidenceItems array of items with a

        numeric .Weight property.

    .OUTPUTS

        [double] in [0.0, 1.0]

    #>

    [CmdletBinding()]

    [OutputType([double])]

    param([Parameter(Mandatory, ValueFromPipeline)] $Finding)

    process {

        $items = @($Finding.EvidenceItems)

        if (-not $items -or $items.Count -eq 0) { return 0.0 }

        $compound = 1.0

        foreach ($e in $items) {

            $w = [double]$e.Weight

            if ($w -lt 0) { $w = 0 } elseif ($w -gt 1) { $w = 1 }

            $compound *= (1.0 - $w)

        }

        return [math]::Round(1.0 - $compound, 4)

    }

}

function New-MsixFinding {

    <#

    .SYNOPSIS

        Constructs a finding in the canonical evidence-graph shape.

    .DESCRIPTION

        Returns a pscustomobject with both the legacy fields (Severity,

        Category, Symptom, Recommendation, Evidence, AppId) so existing

        consumers stay green AND the new fields (Confidence,

        EvidenceItems[]) so future consumers can pivot on per-source

        provenance and confidence scores.

        Initial evidence items can be supplied via -EvidenceItems; they

        will be passed straight through to Get-MsixFindingConfidence to

        compute the initial Confidence. Use Add-MsixEvidence to add more

        items after construction.

    .PARAMETER Category

        Stable identifier — the same string SARIF emits as ruleId

        (prefixed with 'MSIX.'). Examples: 'WorkingDirectory',

        'ManifestFix:FileSystemWriteVirtualization', 'ShellExt'.

    .PARAMETER Severity

        Analyst's classification of impact. One of Error / Warning /

        Info / Debug.

    .PARAMETER Symptom

        Single human-readable sentence describing what was observed.

    .PARAMETER Recommendation

        Operator-facing remediation hint (cmdlet name + args, typically).

        Surfaces in SARIF result.properties.recommendation.

    .PARAMETER Evidence

        Legacy evidence string (single, free-form). Retained for the

        pscustomobject projection. Per-source structured evidence

        belongs in -EvidenceItems instead.

    .PARAMETER AppId

        The Application/@Id this finding applies to, when scoped to a

        single Application. $null for package-wide findings.

    .PARAMETER EvidenceItems

        Array of evidence entries (hashtables/pscustomobjects). Each

        entry should have at least Source and Weight; additional

        per-source properties (Path, Result, FilePath, etc.) ride along.

    .OUTPUTS

        [pscustomobject] PSTypeName 'MsixFinding'.

    #>

    [CmdletBinding()]

    [OutputType([pscustomobject])]

    [Diagnostics.CodeAnalysis.SuppressMessageAttribute(

        'PSUseShouldProcessForStateChangingFunctions', '',

        Justification = 'Pure constructor — returns a pscustomobject, no IO, no global state change. PSSA flags any New-* verb without seeing the body.')]

    param(

        [Parameter(Mandatory)] [string]$Category,

        [Parameter(Mandatory)] [ValidateSet('Error','Warning','Info','Debug')] [string]$Severity,

        [Parameter(Mandatory)] [string]$Symptom,

        [string]$Recommendation,

        [string]$Evidence,

        [string]$AppId,

        [object[]]$EvidenceItems = @()

    )

    $finding = [pscustomobject]@{

        PSTypeName     = 'MsixFinding'

        # Legacy fields kept verbatim so every existing consumer (SARIF,

        # autofix orchestrator, Get-MsixCompatibilityReport report) reads

        # the new shape without code changes.

        Severity       = $Severity

        Category       = $Category

        Symptom        = $Symptom

        Recommendation = $Recommendation

        Evidence       = $Evidence

        AppId          = $AppId

        # New fields — additive. Force the cast to [object[]] so PowerShell

        # doesn't unroll a one-element list to a scalar (breaks .Count).

        Confidence     = 0.0

        EvidenceItems  = [object[]] @($EvidenceItems)

    }

    $finding.Confidence = Get-MsixFindingConfidence -Finding $finding

    return $finding

}

function Add-MsixEvidence {

    <#

    .SYNOPSIS

        Appends an evidence item to a finding and recomputes Confidence.

    .DESCRIPTION

        Mutates the finding in place. The supplied -Source must be a known

        source name (see $script:MsixEvidenceDefaultWeights) OR the caller

        must pass an explicit -Weight to override the lookup.

        Extra named parameters are stored as properties of the evidence

        item alongside Source and Weight — e.g. -Path / -Result / -Operation

        for ProcMon evidence, -Import / -Module for PE-imports evidence.

    .PARAMETER Finding

        Finding object from New-MsixFinding (or a legacy finding promoted

        via ConvertTo-MsixFinding).

    .PARAMETER Source

        Provenance label. Should be one of: procmon, trace-fixup,

        manifest, registry-dat, pe-imports, pe-strings, filesystem,

        static-analysis, manifest-cross, heuristic, user-supplied.

    .PARAMETER Weight

        Override the default weight. Use only when you have a good reason

        — e.g. "this static-analysis signal happens to be unambiguous in

        this specific case" or "this Procmon hit is low confidence because

        it's intermittent".

    .PARAMETER Properties

        Hashtable of extra per-source properties to attach to the evidence

        item. Anything goes here; SARIF picks it up verbatim.

    .OUTPUTS

        The same finding object (for fluent chaining: New-MsixFinding ...

        | Add-MsixEvidence ...).

    #>

    [CmdletBinding()]

    [OutputType([pscustomobject])]

    param(

        [Parameter(Mandatory, ValueFromPipeline)] $Finding,

        [Parameter(Mandatory)] [string]$Source,

        # When -Weight isn't passed, we look up the per-source default

        # (or 0.3 for an unknown source). When it IS passed, we honour

        # whatever value the caller gave — INCLUDING negative values,

        # which are clamped to 0 inside Get-MsixFindingConfidence (the

        # "out of range" test relies on that).

        # Detecting "passed vs not passed" via $PSBoundParameters so a

        # legitimate -Weight -2 doesn't accidentally collide with a

        # sentinel.

        [double]$Weight,

        [hashtable]$Properties

    )

    process {

        if (-not $PSBoundParameters.ContainsKey('Weight')) {

            if ($script:MsixEvidenceDefaultWeights.ContainsKey($Source)) {

                $Weight = $script:MsixEvidenceDefaultWeights[$Source]

            } else {

                $Weight = 0.3   # unknown source: weak default, never 0

            }

        }

        $entry = [ordered]@{ Source = $Source; Weight = [double]$Weight }

        if ($Properties) {

            foreach ($k in $Properties.Keys) { $entry[$k] = $Properties[$k] }

        }

        # Assign as a real [object[]]. The forced cast keeps PowerShell

        # from unwrapping a single-element array, which would otherwise

        # collapse "one evidence item" to a scalar and break .Count

        # checks downstream.

        $Finding.EvidenceItems = [object[]] (@($Finding.EvidenceItems) + [pscustomobject]$entry)

        $Finding.Confidence    = Get-MsixFindingConfidence -Finding $Finding

        return $Finding

    }

}

function ConvertTo-MsixFinding {

    <#

    .SYNOPSIS

        Converts a legacy pscustomobject finding into a MsixFinding without

        touching the original analyzer that emitted it.

    .DESCRIPTION

        The orchestrator (Get-MsixCompatibilityReport) calls this on every

        finding before they go into the report, so consumers downstream see

        a uniform shape. Legacy findings with no structured evidence get a

        single synthetic evidence item whose Source = the analyzer-origin

        guess (defaults to 'static-analysis') and whose Weight matches the

        per-severity default table. This way:

          - Error findings come out at confidence 0.95

          - Warning findings at 0.70

          - Info findings at 0.40

        ... matching what the existing autofix orchestrator implicitly

        treated as "fire" vs "recommend" vs "ignore". No behavioural change

        for callers that don't ask for the confidence field.

        If the legacy finding already had an EvidenceItems[] array, this is

        a no-op (the finding is already promoted).

    .PARAMETER Finding

        Legacy pscustomobject finding (Severity/Category/Symptom/...).

    .PARAMETER Source

        Source label to attach to the synthetic evidence item. Defaults

        to 'static-analysis' which is the most common origin.

    .OUTPUTS

        [pscustomobject] PSTypeName 'MsixFinding'.

    #>

    [CmdletBinding()]

    [OutputType([pscustomobject])]

    param(

        [Parameter(Mandatory, ValueFromPipeline)] $Finding,

        [string]$Source = 'static-analysis'

    )

    process {

        if (-not $Finding) { return $null }

        # Already promoted — don't re-wrap.

        if ($Finding.PSObject.TypeNames -contains 'MsixFinding' -and `

            $Finding.PSObject.Properties.Match('EvidenceItems').Count -gt 0) {

            return $Finding

        }

        $severity = if ($Finding.Severity) { [string]$Finding.Severity } else { 'Info' }

        $weight   = if ($script:MsixEvidenceSeverityWeights.ContainsKey($severity)) {

            $script:MsixEvidenceSeverityWeights[$severity]

        } else { 0.4 }

        $f = New-MsixFinding `

            -Category       ([string]$Finding.Category) `

            -Severity       $severity `

            -Symptom        ([string]$Finding.Symptom) `

            -Recommendation ([string]$Finding.Recommendation) `

            -Evidence       ([string]$Finding.Evidence) `

            -AppId          ([string]$Finding.AppId)

        $props = @{}

        if ($Finding.Evidence) { $props['EvidenceText'] = [string]$Finding.Evidence }

        Add-MsixEvidence -Finding $f -Source $Source -Weight $weight -Properties $props | Out-Null

        # Tag the synthetic-evidence wrapper so the orchestrator can tell

        # "this came through the legacy adapter; let it through regardless

        # of confidence" apart from "this is a low-weight new-shape

        # finding the operator intentionally produced; respect the gate".

        $f | Add-Member -NotePropertyName 'PromotedFromLegacy' -NotePropertyValue $true -Force

        return $f

    }

}

function Merge-MsixFinding {

    <#

    .SYNOPSIS

        Merges multiple MsixFinding objects covering the same Category +

        AppId into one finding by combining their evidence lists.

    .DESCRIPTION

        Two findings are considered "the same finding" when they have the

        same Category AND the same AppId (null counts as a match). All

        evidence items from the duplicates are concatenated onto the

        primary finding and Confidence is recomputed.

        Severity stays as the maximum of the two (Error > Warning > Info >

        Debug); Symptom and Recommendation come from the higher-severity

        finding (so the "best" wording survives).

        This is the function the orchestrator runs after concatenating

        analyzer outputs. Static + Procmon + heuristic all detecting the

        same install-dir-write issue collapse from three rows into one

        row with three pieces of evidence.

    .PARAMETER Findings

        Array of MsixFinding (or promotable legacy findings — they are

        promoted on the fly).

    .OUTPUTS

        [object[]] de-duplicated MsixFinding array.

    #>

    [CmdletBinding()]

    [OutputType([object[]])]

    param([Parameter(Mandatory, ValueFromPipeline)] [object[]]$Findings)

    begin {

        $severityRank = @{ 'Error'=3; 'Warning'=2; 'Info'=1; 'Debug'=0 }

        $byKey        = @{}

    }

    process {

        # Promote any not-yet-promoted entries, then fold each into $byKey

        # keyed by (Category, AppId). Without a real process block here,

        # pipeline input would drop every batch except the last one.

        foreach ($raw in $Findings) {

            if (-not $raw) { continue }

            $f = if ($raw.PSObject.TypeNames -contains 'MsixFinding' -and `

                     $raw.PSObject.Properties.Match('EvidenceItems').Count -gt 0) {

                $raw

            } else {

                ConvertTo-MsixFinding -Finding $raw

            }

            if (-not $f) { continue }

            $appId = if ($null -ne $f.AppId) { $f.AppId } else { '' }

            $key = ('{0}|{1}' -f $f.Category, $appId)

            if (-not $byKey.ContainsKey($key)) {

                $byKey[$key] = $f

                continue

            }

            $primary = $byKey[$key]

            # Concatenate evidence (de-dupe identical Source+Path tuples).

            $combined = @($primary.EvidenceItems) + @($f.EvidenceItems)

            $seen = New-Object System.Collections.Generic.HashSet[string]

            $deduped = @($combined | Where-Object {

                $sig = ('{0}|{1}' -f $_.Source, ($_.PSObject.Properties['Path'] | ForEach-Object Value))

                $seen.Add($sig)

            })

            $primary.EvidenceItems = [object[]]$deduped

            $primary.Confidence    = Get-MsixFindingConfidence -Finding $primary

            # Promote severity to the higher rank.

            $pRank = if ($severityRank.ContainsKey($primary.Severity)) { $severityRank[$primary.Severity] } else { 1 }

            $fRank = if ($severityRank.ContainsKey($f.Severity))       { $severityRank[$f.Severity] }       else { 1 }

            if ($fRank -gt $pRank) {

                $primary.Severity       = $f.Severity

                $primary.Symptom        = $f.Symptom

                $primary.Recommendation = $f.Recommendation

            }

        }

    }

    end {

        return @($byKey.Values)

    }

}

function ConvertTo-MsixLegacyFinding {

    <#

    .SYNOPSIS

        Strips the evidence list and confidence field, returning the

        old-shape pscustomobject. Useful for callers that haven't been

        updated and still expect the original fields only.

    .DESCRIPTION

        New-MsixFinding already includes every legacy field on the

        object, so this is mostly a passthrough; the explicit conversion

        exists so callers can request the narrowed shape without relying

        on PowerShell's quiet property-tolerance.

    #>

    [CmdletBinding()]

    [OutputType([pscustomobject])]

    param([Parameter(Mandatory, ValueFromPipeline)] $Finding)

    process {

        [pscustomobject]@{

            Severity       = $Finding.Severity

            Category       = $Finding.Category

            Symptom        = $Finding.Symptom

            Recommendation = $Finding.Recommendation

            Evidence       = $Finding.Evidence

            AppId          = $Finding.AppId

        }

    }

}