scripts/Backfill-ChangelogCitations.ps1
|
<# .SYNOPSIS Backfill CHANGELOG.md with missing PR citations from git history. .DESCRIPTION Walks git log on the current branch, extracts commit subjects that reference PRs via (#NNN) patterns, determines which CHANGELOG version section each commit belongs to (by tag date), and appends a citation bullet for any PR not already mentioned in that section. The script is idempotent: re-running it produces no changes if every PR is already cited. .PARAMETER ChangelogPath Path to the CHANGELOG.md file. Defaults to CHANGELOG.md in the repo root. .PARAMETER RepoUrl GitHub repository URL for link generation. Defaults to https://github.com/martinopedal/azure-analyzer. .PARAMETER WhatIf When set, prints the diff without writing to disk. .EXAMPLE .\scripts\Backfill-ChangelogCitations.ps1 .\scripts\Backfill-ChangelogCitations.ps1 -WhatIf #> [CmdletBinding(SupportsShouldProcess = $true)] param( [string]$ChangelogPath = (Join-Path $PSScriptRoot '..\CHANGELOG.md'), [string]$RepoUrl = 'https://github.com/martinopedal/azure-analyzer' ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' # ── helpers ────────────────────────────────────────────────────────────── function Get-TagDates { <# Returns @{ 'v1.0.0' = [datetime]; ... } for every annotated tag. #> $tags = @{} $raw = git --no-pager for-each-ref --sort=creatordate ` --format='%(refname:short)|%(creatordate:iso8601)' refs/tags/ 2>&1 foreach ($line in $raw) { if ($line -match '^([^\|]+)\|(.+)$') { $tags[$Matches[1]] = [datetime]::Parse($Matches[2]) } } return $tags } function Get-CommitsWithPRs { <# Returns an array of objects: @{ SHA; Subject; PRNumbers; AuthorDate } Only includes commits whose subject contains (#NNN). #> $commits = @() # Use HEAD (current branch) not --all, to avoid stash/detached refs $raw = git --no-pager log --format='%H|%aI|%s' HEAD 2>&1 foreach ($line in $raw) { if ($line -match '^([0-9a-f]{40})\|([^\|]+)\|(.+)$') { $sha = $Matches[1] $dateStr = $Matches[2] $subject = $Matches[3] # Skip git stash entries if ($subject -match '^(WIP on |index on |On \w+:)') { continue } $prNums = [System.Collections.Generic.List[int]]::new() $m = [regex]::Matches($subject, '\(#(\d+)\)') foreach ($match in $m) { $prNums.Add([int]$match.Groups[1].Value) } if ($prNums.Count -gt 0) { $commits += [PSCustomObject]@{ SHA = $sha Subject = $subject PRNumbers = $prNums.ToArray() AuthorDate = [datetime]::Parse($dateStr) } } } } return $commits } function Get-VersionForCommit { <# Given sorted tag boundaries, returns the version section name a commit belongs to. Tags are expected in chronological order. Returns e.g. '1.1.0' or 'Unreleased'. #> param( [datetime]$CommitDate, [array]$SortedBoundaries # @( @{Tag='v1.0.0'; Date=...}, ... ) ) if (-not $SortedBoundaries -or $SortedBoundaries.Count -eq 0) { return 'Unreleased' } for ($i = $SortedBoundaries.Count - 1; $i -ge 0; $i--) { $b = $SortedBoundaries[$i] if ($CommitDate -le $b.Date) { return ($b.Tag -replace '^v', '') } } return 'Unreleased' } function Get-CitedPRsPerSection { <# Parses CHANGELOG.md and returns a hashtable: @{ '1.1.1' = @(831, 778, ...); 'Unreleased' = @(...); ... } #> param([string[]]$Lines) $sections = @{} $currentSection = $null foreach ($line in $Lines) { if ($line -match '^\#\#\s+\[(\d+\.\d+\.\d+)\]') { $currentSection = $Matches[1] if (-not $sections.ContainsKey($currentSection)) { $sections[$currentSection] = [System.Collections.Generic.HashSet[int]]::new() } } elseif ($line -match '^\#\#\s+\[?Unreleased\]?') { $currentSection = 'Unreleased' if (-not $sections.ContainsKey($currentSection)) { $sections[$currentSection] = [System.Collections.Generic.HashSet[int]]::new() } } if ($null -ne $currentSection) { $prMatches = [regex]::Matches($line, '\[#(\d+)\]') foreach ($pm in $prMatches) { [void]$sections[$currentSection].Add([int]$pm.Groups[1].Value) } # Also catch bare (#NNN) patterns $barePR = [regex]::Matches($line, '\(#(\d+)\)') foreach ($bp in $barePR) { [void]$sections[$currentSection].Add([int]$bp.Groups[1].Value) } # And plain #NNN references (e.g. "closes #529") $plainPR = [regex]::Matches($line, '(?<!\[)#(\d+)(?!\])') foreach ($pp in $plainPR) { [void]$sections[$currentSection].Add([int]$pp.Groups[1].Value) } } } return $sections } function Get-ConventionalType { <# Extracts the conventional-commit type from a subject line. #> param([string]$Subject) if ($Subject -match '^(feat|fix|docs|chore|ci|test|refactor|perf|deps|build|style)\b') { return $Matches[1] } # Infer from keywords if ($Subject -match '\bfix\b|\bbugfix\b|\bhotfix\b') { return 'fix' } if ($Subject -match '\btest\b|\bpester\b|\be2e\b') { return 'test' } if ($Subject -match '\bdocs?\b|\bREADME\b|\bCHANGELOG\b') { return 'docs' } if ($Subject -match '\bci\b|\bworkflow\b|\bCI\b') { return 'ci' } return 'chore' } function Get-SectionHeading { <# Maps conventional-commit type to release-please section heading. #> param([string]$Type) switch ($Type) { 'feat' { return 'Features' } 'fix' { return 'Fixes' } 'docs' { return 'Documentation' } 'chore' { return 'Chores' } 'ci' { return 'CI' } 'test' { return 'Tests' } 'refactor' { return 'Refactors' } 'perf' { return 'Performance' } 'deps' { return 'Dependencies' } default { return 'Chores' } } } function Format-CitationBullet { <# Formats a single CHANGELOG bullet in release-please style. #> param( [string]$Subject, [int[]]$PRNumbers, [string]$SHA, [string]$RepoUrl ) # Strip conventional prefix and scope for cleaner display $desc = $Subject -replace '^\w+(\([^)]*\))?:\s*', '' # Strip trailing PR refs from description since we add them explicitly $desc = $desc -replace '\s*\(#\d+\)\s*', ' ' $desc = $desc.Trim() $prLinks = ($PRNumbers | ForEach-Object { "[#$_]($RepoUrl/issues/$_)" }) -join ' ' $shortSha = $SHA.Substring(0, 7) $shaLink = "[$shortSha]($RepoUrl/commit/$SHA)" return "* $desc ($prLinks) ($shaLink)" } # ── main ───────────────────────────────────────────────────────────────── $resolvedPath = (Resolve-Path $ChangelogPath -ErrorAction Stop).Path $originalLines = Get-Content $resolvedPath -Encoding UTF8 Write-Verbose "Parsing existing CHANGELOG citations..." $citedPerSection = Get-CitedPRsPerSection -Lines $originalLines Write-Verbose "Collecting tag boundaries..." $tagDates = Get-TagDates $sortedBoundaries = @($tagDates.GetEnumerator() | Sort-Object Value | ForEach-Object { [PSCustomObject]@{ Tag = $_.Key; Date = $_.Value } }) Write-Verbose "Walking git log for PR references..." $commits = Get-CommitsWithPRs # Build list of insertions: group by (version, section heading) $insertions = @{} # key = "version|heading", value = list of bullets $allMissingPRs = [System.Collections.Generic.HashSet[int]]::new() foreach ($commit in $commits) { $version = Get-VersionForCommit -CommitDate $commit.AuthorDate -SortedBoundaries $sortedBoundaries # Check if ALL PRs in this commit are already cited $uncitedPRs = @() $sectionCited = if ($citedPerSection.ContainsKey($version)) { $citedPerSection[$version] } else { [System.Collections.Generic.HashSet[int]]::new() } foreach ($pr in $commit.PRNumbers) { # Also check if cited in ANY section (global dedup) $globalCited = $false foreach ($sec in $citedPerSection.Values) { if ($sec.Contains($pr)) { $globalCited = $true; break } } # Also skip if we already plan to insert this PR if ($allMissingPRs.Contains($pr)) { $globalCited = $true } if (-not $globalCited) { $uncitedPRs += $pr } } if ($uncitedPRs.Count -eq 0) { continue } $ccType = Get-ConventionalType -Subject $commit.Subject $heading = Get-SectionHeading -Type $ccType $key = "$version|$heading" if (-not $insertions.ContainsKey($key)) { $insertions[$key] = [System.Collections.Generic.List[string]]::new() } $bullet = Format-CitationBullet -Subject $commit.Subject ` -PRNumbers $uncitedPRs -SHA $commit.SHA -RepoUrl $RepoUrl $insertions[$key].Add($bullet) foreach ($pr in $uncitedPRs) { [void]$allMissingPRs.Add($pr) } } if ($allMissingPRs.Count -eq 0) { Write-Host "✅ No missing PR citations found — CHANGELOG is up to date." return } Write-Host "Found $($allMissingPRs.Count) uncited PR(s) across $($insertions.Count) section(s)." # ── Apply insertions to the CHANGELOG lines ────────────────────────────── $newLines = [System.Collections.Generic.List[string]]::new() foreach ($l in $originalLines) { $newLines.Add($l) } # For each version section, find the right ### heading and insert after it. # Process in reverse line order so insertions don't shift indices. $insertionPoints = @() foreach ($entry in $insertions.GetEnumerator()) { $parts = $entry.Key -split '\|', 2 $version = $parts[0] $heading = $parts[1] $bullets = $entry.Value # Find the version header line $versionLineIdx = -1 $nextVersionLineIdx = $newLines.Count for ($i = 0; $i -lt $newLines.Count; $i++) { $line = $newLines[$i] if ($version -eq 'Unreleased') { # Match the FIRST Unreleased header if ($line -match '^\#\#\s+\[?Unreleased\]?' -and $versionLineIdx -eq -1) { $versionLineIdx = $i } elseif ($versionLineIdx -ge 0 -and $line -match '^\#\#\s+\[?\d+\.\d+\.\d+\]?') { $nextVersionLineIdx = $i break } } else { if ($line -match "^\#\#\s+\[$([regex]::Escape($version))\]") { $versionLineIdx = $i } elseif ($versionLineIdx -ge 0 -and $i -gt $versionLineIdx -and ($line -match '^\#\#\s+\[' -or $line -match '^\#\#\s+\[?Unreleased\]?')) { $nextVersionLineIdx = $i break } } } if ($versionLineIdx -eq -1) { Write-Warning "Could not find section header for version '$version' — skipping $($bullets.Count) bullet(s)." continue } # Find the ### $heading within this version section $headingLineIdx = -1 for ($i = $versionLineIdx + 1; $i -lt $nextVersionLineIdx; $i++) { if ($newLines[$i] -match "^\#\#\#\s+$([regex]::Escape($heading))\s*$") { $headingLineIdx = $i break } } if ($headingLineIdx -eq -1) { # Need to create the heading — insert just before the next ## or at end of section $insertAt = $nextVersionLineIdx # Find a good spot: after the last ### block in this section for ($i = $nextVersionLineIdx - 1; $i -gt $versionLineIdx; $i--) { if ($newLines[$i] -match '\S') { $insertAt = $i + 1 break } } $headingBlock = @('', "### $heading", '') $headingBlock += $bullets $headingBlock += '' $insertionPoints += [PSCustomObject]@{ Index = $insertAt Lines = $headingBlock } } else { # Find the end of the existing bullet list under this heading $insertAt = $headingLineIdx + 1 for ($i = $headingLineIdx + 1; $i -lt $nextVersionLineIdx; $i++) { $l = $newLines[$i] if ($l -match '^\*\s' -or $l -match '^\s+' -or $l -eq '') { $insertAt = $i + 1 } elseif ($l -match '^\#\#\#') { break } } $insertionPoints += [PSCustomObject]@{ Index = $insertAt Lines = $bullets } } } # Sort by descending index so later insertions don't shift earlier ones $insertionPoints = $insertionPoints | Sort-Object -Property Index -Descending foreach ($ip in $insertionPoints) { for ($i = $ip.Lines.Count - 1; $i -ge 0; $i--) { $newLines.Insert($ip.Index, $ip.Lines[$i]) } } $newContent = $newLines -join "`n" # ── Output / Write ────────────────────────────────────────────────────── if ($WhatIfPreference) { Write-Host "`n─── DRY RUN: would add $($allMissingPRs.Count) citation(s) ───" # Show a compact diff summary $added = ($newLines.Count - $originalLines.Length) Write-Host "Lines added: $added" Write-Host "Sections touched: $($insertions.Count)" Write-Host "PR numbers: $($allMissingPRs | Sort-Object | ForEach-Object { "#$_" })" } else { if ($PSCmdlet.ShouldProcess($resolvedPath, "Write $($allMissingPRs.Count) backfilled citations")) { Set-Content -Path $resolvedPath -Value $newContent -Encoding UTF8 -NoNewline Write-Host "✅ Wrote $($allMissingPRs.Count) citation(s) to $resolvedPath" } } return [PSCustomObject]@{ CitationsAdded = $allMissingPRs.Count PRNumbers = ($allMissingPRs | Sort-Object) SectionsTouched = $insertions.Keys } |