scripts/render-tool-output-audit.ps1
|
# Helper for issue #432a — render docs/tool-output-audit.md and # docs/tool-output-audit.json from audit-raw.json. [CmdletBinding()] param([string] $RepoRoot = (Split-Path $PSScriptRoot -Parent)) $ErrorActionPreference = 'Stop' $raw = Get-Content (Join-Path $RepoRoot 'audit-raw.json') -Raw | ConvertFrom-Json # Curated FindingRow schema (modules/shared/Schema.ps1 v2.2 — for filter only). $schemaSet = @( 'Id','Source','EntityId','EntityType','Title','RuleId','Compliant','ProvenanceRunId', 'Category','Severity','Detail','Remediation','ResourceId','LearnMoreUrl','Platform', 'SubscriptionId','SubscriptionName','ResourceGroup','ManagementGroupPath','Frameworks', 'Controls','Confidence','EvidenceCount','MissingDimensions','ProvenanceSource', 'ProvenanceRawRecordRef','ProvenanceTimestamp','Pillar','Impact','Effort','DeepLinkUrl', 'RemediationSnippets','EvidenceUris','BaselineTags','ScoreDelta','MitreTactics', 'MitreTechniques','EntityRefs','ToolVersion','SchemaVersion' ) # Envelope-level keys that are not per-finding semantic fields and therefore # are NOT candidates for FindingRow extension. $envelopeIgnore = @( 'Errors','Findings','GeneratedAt','RunId','TenantId','Provider','Scope','Tool', 'Status','SchemaVersion','Message','ExitCode','ContinuationToken','Body','Output', 'name','rg','path','params','Path','Average','Total','Count','Sum','Min','Max', 'Stderr','Stdout','Records','Items' ) function Format-FieldList { param([string[]] $Items, [int] $Max = 8) if (-not $Items -or $Items.Count -eq 0) { return '_(none detected)_' } $shown = $Items | Select-Object -First $Max $more = $Items.Count - $shown.Count $s = ($shown | ForEach-Object { "``$_``" }) -join ', ' if ($more -gt 0) { $s += " (+$more more)" } return $s } # --- Build markdown --- $sb = New-Object System.Text.StringBuilder [void]$sb.AppendLine('# Tool output fidelity audit (#432a)') [void]$sb.AppendLine() [void]$sb.AppendLine('> Track D / sub-task **#432a** of epic #427. Audit-first, doc-only, no schema changes. Input for **#432b** (FindingRow extension) and **#432c** (per-family adoption), both deferred post-window per Round 3 reconciliation.') [void]$sb.AppendLine() [void]$sb.AppendLine('## Methodology — audit-first, delta-only') [void]$sb.AppendLine() [void]$sb.AppendLine('This audit is **static** and **delta-only**. For every tool registered in `tools/tool-manifest.json` (the single source of truth) we:') [void]$sb.AppendLine() [void]$sb.AppendLine('1. Locate the wrapper (`modules/Invoke-<Tool>.ps1`) and normalizer (`modules/normalizers/Normalize-<Tool>.ps1`).') [void]$sb.AppendLine('2. Statically extract the property names emitted on raw / v1-envelope finding objects in the wrapper.') [void]$sb.AppendLine('3. Statically extract the `New-FindingRow` parameters bound (directly or via splat hashtable) in the normalizer.') [void]$sb.AppendLine('4. Cross-reference the v2.2 `FindingRow` schema in `modules/shared/Schema.ps1`.') [void]$sb.AppendLine('5. Diff (1) → (3) and classify each wrapper-emitted field as `preserved`, `suspected-dropped`, `confirmed-dropped`, or `n/a` (envelope/diagnostic).') [void]$sb.AppendLine() [void]$sb.AppendLine('Static analysis catches the majority of dropped fields, but per-tenant runtime payloads can include additional optional properties not visible to the script. Where confirmation requires actual tool execution against a live tenant we mark **`pending-real-tenant-run`** instead of **`complete`**. This is honest scope-flagging — #432b will only schema-add fields in the **`confirmed-dropped`** column built from the union of static analysis + the runtime-fixture pass that ships under #432c.') [void]$sb.AppendLine() [void]$sb.AppendLine('Sidecar machine-readable data: [`tool-output-audit.json`](./tool-output-audit.json).') [void]$sb.AppendLine() [void]$sb.AppendLine('## Tool inventory') [void]$sb.AppendLine() [void]$sb.AppendLine(('Total tools registered: **{0}** (enabled: **{1}**, disabled: **{2}**).' -f $raw.Count, ($raw | Where-Object Enabled).Count, ($raw | Where-Object { -not $_.Enabled }).Count)) [void]$sb.AppendLine() [void]$sb.AppendLine('| Tool | Provider | Scope | Wrapper file | Wrapper-preserved fields | Normalizer-preserved fields | Tool-emitted fields not preserved (suspected) | Audit status |') [void]$sb.AppendLine('| --- | --- | --- | --- | --- | --- | --- | --- |') $sidecar = New-Object System.Collections.Generic.List[object] foreach ($e in $raw) { $wrapperFile = if ($e.WrapperFile) { "``$($e.WrapperFile)``" } else { '_(none)_' } # Suspected-dropped = wrapper field that is neither in schema nor an # envelope/diagnostic key, and not present in normalizer output set. $candidates = $e.WrapperFields | Where-Object { $_ -and ($_ -notin $envelopeIgnore) -and ($_ -notin $schemaSet) -and ($_ -notin $e.NormalizerSchemaFields) } | Sort-Object -Unique $status = if (-not $e.Enabled) { 'disabled (skipped)' } elseif (-not $e.WrapperFile -or -not $e.NormalizerFile) { 'pending-real-tenant-run (post-processor; not a finding-emitting wrapper)' } elseif ($e.WrapperFields.Count -eq 0) { 'pending-real-tenant-run (wrapper uses dynamic finding shape; static extract empty)' } else { 'complete (static); pending-real-tenant-run for runtime confirmation' } $row = '| `{0}` | {1} | {2} | {3} | {4} | {5} | {6} | {7} |' -f ` $e.Tool, $e.Provider, $e.Scope, $wrapperFile, ` (Format-FieldList -Items $e.WrapperFields -Max 6), ` (Format-FieldList -Items $e.NormalizerSchemaFields -Max 6), ` (Format-FieldList -Items $candidates -Max 6), ` $status [void]$sb.AppendLine($row) $sidecar.Add([pscustomobject]@{ tool = $e.Tool displayName = $e.DisplayName provider = $e.Provider scope = $e.Scope enabled = $e.Enabled wrapperFile = $e.WrapperFile normalizerFile = $e.NormalizerFile wrapperFieldsPreserved = @($e.WrapperFields) normalizerFieldsPreserved = @($e.NormalizerSchemaFields) suspectedDroppedToolEmittedFields = @($candidates) schemaFieldsNotEmittedByNormalizer = @($e.SchemaFieldsMissing) auditStatus = $status }) } # --- Aggregate candidate fields for #432b --- $globalTally = @{} foreach ($s in $sidecar) { if (-not $s.enabled) { continue } foreach ($f in $s.suspectedDroppedToolEmittedFields) { if (-not $globalTally.ContainsKey($f)) { $globalTally[$f] = 0 } $globalTally[$f]++ } } [void]$sb.AppendLine() [void]$sb.AppendLine('## Candidate FindingRow additions for #432b') [void]$sb.AppendLine() [void]$sb.AppendLine('Fields below are emitted by one or more tool wrappers but have **no home in the current `FindingRow`** and are not preserved by their normalizer. Ordered by occurrence count across the enabled tool set. **#432b** will use this list to propose additive schema fields after foundation #435 lands; **#432c** will then drive per-family normalizer adoption.') [void]$sb.AppendLine() [void]$sb.AppendLine('| # | Candidate field | Occurrence (tools) | Notes |') [void]$sb.AppendLine('| --- | --- | ---:| --- |') $noteFor = @{ 'AdoOrg' = 'ADO organisation context — currently leaks into Detail blob.' 'AdoProject' = 'ADO project context — currently leaks into Detail blob.' 'CommitSha' = 'Git commit SHA for SCM-scoped findings (gitleaks, scorecard, zizmor, trivy).' 'CommitUrl' = 'Browser-deep-link to the offending commit; useful for HTML report drilldowns.' 'Repo' = 'Short owner/repo identifier (distinct from `EntityId` canonical form).' 'RepositoryId' = 'GitHub numeric repo id; useful for cross-correlation.' 'RepositoryCanonicalId' = 'Canonical repo entity id when wrapper produces multiple kinds.' 'Currency' = 'ISO 4217 currency for cost-bearing findings (azure-cost, infracost, finops).' 'SecretType' = 'Detector classification for secret-scanner findings (gitleaks, ado-repos-secrets).' 'ResourceType' = 'ARM resource type already present in `ResourceId`, but explicit field eases grouping.' 'ResourceName' = 'Display name distinct from canonical id; HTML report uses it today via Detail parsing.' 'Location' = 'Azure region for the finding subject; useful for residency / quota dashboards.' 'RecommendationId' = 'Stable advisor / WARA / reliability recommendation id; overlaps with `RuleId` but emitted distinctly.' 'Recommendation' = 'Free-form recommendation string from advisor-style tools.' 'LineNumber' = 'Source line for SCA / SAST / IaC findings — drives editor deep-links.' 'Path' = 'Source file path (relative to repo root) for SCA / SAST / IaC findings.' 'QueryIntent' = 'Copilot-triage classified user intent label (when triage is enabled).' 'DisablesDefaultsWithoutCustomRules' = 'PSRule meta-flag indicating baseline-suppressed rule set.' } $rank = 0 foreach ($kv in ($globalTally.GetEnumerator() | Sort-Object @{Expression='Value';Descending=$true}, @{Expression='Key';Descending=$false})) { $rank++ $note = if ($noteFor.ContainsKey($kv.Key)) { $noteFor[$kv.Key] } else { '' } [void]$sb.AppendLine(('| {0} | `{1}` | {2} | {3} |' -f $rank, $kv.Key, $kv.Value, $note)) } [void]$sb.AppendLine() [void]$sb.AppendLine('## Existing FindingRow optional fields with low normalizer adoption') [void]$sb.AppendLine() [void]$sb.AppendLine('Schema v2.2 already defines these fields, but most normalizers do not yet populate them. They are **not** new schema work — they are **adoption gaps** for #432c. Listed by miss-count across the 36 enabled tools.') [void]$sb.AppendLine() [void]$sb.AppendLine('| Schema field | Normalizers not populating |') [void]$sb.AppendLine('| --- | ---:|') $missTally = @{} foreach ($s in $sidecar) { if (-not $s.enabled -or -not $s.normalizerFile) { continue } foreach ($f in $s.schemaFieldsNotEmittedByNormalizer) { if (-not $missTally.ContainsKey($f)) { $missTally[$f] = 0 } $missTally[$f]++ } } $additiveFocus = @('Pillar','Impact','Effort','DeepLinkUrl','RemediationSnippets','EvidenceUris','BaselineTags','ScoreDelta','MitreTactics','MitreTechniques','EntityRefs','ToolVersion','RuleId','Frameworks','Controls') foreach ($kv in ($missTally.GetEnumerator() | Where-Object { $_.Key -in $additiveFocus } | Sort-Object @{Expression='Value';Descending=$true}, @{Expression='Key';Descending=$false})) { [void]$sb.AppendLine(('| `{0}` | {1} |' -f $kv.Key, $kv.Value)) } [void]$sb.AppendLine() [void]$sb.AppendLine('## Audit status legend') [void]$sb.AppendLine() [void]$sb.AppendLine('- **complete (static)** — wrapper + normalizer both inspected; field deltas computed from source.') [void]$sb.AppendLine('- **pending-real-tenant-run** — confirmation requires running the tool against a live Azure / M365 / GitHub / ADO tenant. Most rows carry this flag because per-tenant payloads frequently expose optional properties not visible to static analysis.') [void]$sb.AppendLine('- **disabled (skipped)** — tool is `enabled: false` in the manifest (e.g. `copilot-triage`).') [void]$sb.AppendLine() [void]$sb.AppendLine('## How to regenerate this audit') [void]$sb.AppendLine() [void]$sb.AppendLine('```powershell') [void]$sb.AppendLine('# 1. Static field extraction → audit-raw.json') [void]$sb.AppendLine('pwsh -NoProfile -File scripts/audit-tool-fields.ps1') [void]$sb.AppendLine('# 2. Render markdown + sidecar JSON') [void]$sb.AppendLine('pwsh -NoProfile -File scripts/render-tool-output-audit.ps1') [void]$sb.AppendLine('```') [void]$sb.AppendLine() [void]$sb.AppendLine('Both scripts read `tools/tool-manifest.json` (the single source of truth — see `.github/copilot-instructions.md`). Adding or removing a tool there will propagate into the next audit regeneration without further edits.') $mdPath = Join-Path $RepoRoot 'docs\tool-output-audit.md' $jsonPath = Join-Path $RepoRoot 'docs\tool-output-audit.json' Set-Content -Path $mdPath -Value $sb.ToString() -Encoding UTF8 $payload = [pscustomobject]@{ schemaVersion = '2.0' generatedForIssue = '#432a' epic = '#427' methodology = 'static-first, delta-only; pending-real-tenant-run flagged honestly' schemaReference = 'modules/shared/Schema.ps1 (FindingRow v2.2)' manifestSource = 'tools/tool-manifest.json' totalTools = $sidecar.Count enabledTools = ($sidecar | Where-Object enabled).Count candidateFindingRowAdditions = ( $globalTally.GetEnumerator() | Sort-Object @{Expression='Value';Descending=$true}, @{Expression='Key';Descending=$false} | ForEach-Object { [pscustomobject]@{ field = $_.Key; occurrenceCount = $_.Value } } ) schemaFieldsAdoptionGap = ( $missTally.GetEnumerator() | Where-Object { $_.Key -in $additiveFocus } | Sort-Object @{Expression='Value';Descending=$true}, @{Expression='Key';Descending=$false} | ForEach-Object { [pscustomobject]@{ field = $_.Key; normalizersNotPopulating = $_.Value } } ) entries = $sidecar } $payload | ConvertTo-Json -Depth 8 | Set-Content -Path $jsonPath -Encoding UTF8 Write-Host "Wrote $mdPath" Write-Host "Wrote $jsonPath" |