Public/Update-Snapshot.ps1
|
# Copyright (c) 2026 Jeffrey Snover. All rights reserved. # Licensed under the MIT License. See LICENSE file in the project root. function Update-Snapshot { <# .SYNOPSIS Re-generates snapshot.md for all existing sources using updated conversion logic. .DESCRIPTION Loops through all sources/ directories that contain a raw/ subdirectory and re-runs the appropriate document conversion from DocConverters.psm1. Preserves the existing snapshot header and overwrites snapshot.md with the newly converted content. .PARAMETER DryRun Show what would be processed without writing any files. .EXAMPLE Update-Snapshot .EXAMPLE Update-Snapshot -DryRun .EXAMPLE Redo-Snapshots # backward-compat alias #> [CmdletBinding()] param( [switch]$DryRun ) # ───────────────────────────────────────────────────────────────────────── # Paths (use module-scoped $script:RepoRoot set by AITriad.psm1) # ───────────────────────────────────────────────────────────────────────── $RepoRoot = $script:RepoRoot $SourcesDir = Get-SourcesDir # ───────────────────────────────────────────────────────────────────────── # Extract the provenance header from an existing snapshot.md # Uses line-by-line scanning (no HTML-comment regex) to stay AMSI-safe. # Header = everything up to and including the first line that is exactly "---" # ───────────────────────────────────────────────────────────────────────── function Get-SnapshotHeader { param([string]$SnapshotPath) if (-not (Test-Path $SnapshotPath)) { return '' } $Lines = Get-Content $SnapshotPath -Encoding UTF8 $HeaderLines = [System.Collections.Generic.List[string]]::new() $FoundSep = $false foreach ($Line in $Lines) { $HeaderLines.Add($Line) # The provenance block ends with a standalone "---" separator if ($Line.Trim() -eq '---') { $FoundSep = $true break } } if ($FoundSep) { return ($HeaderLines -join "`n") + "`n" } return '' } # ───────────────────────────────────────────────────────────────────────── # Re-convert a single raw file via DocConverters (keeps AMSI-sensitive # calls inside the module rather than this function). # ───────────────────────────────────────────────────────────────────────── function Invoke-ReConvert { param( [string]$FilePath, [string]$Extension ) switch ($Extension) { '.pdf' { return ConvertFrom-Pdf -PdfPath $FilePath } { $_ -in '.htm', '.html' } { $Raw = Get-Content $FilePath -Raw -Encoding UTF8 return ConvertFrom-Html -Html $Raw } { $_ -in '.txt', '.md' } { return Get-Content $FilePath -Raw -Encoding UTF8 } } return $null } # ───────────────────────────────────────────────────────────────────────── # Main loop # ───────────────────────────────────────────────────────────────────────── $DocDirs = Get-ChildItem -Path $SourcesDir -Directory | Where-Object { Test-Path (Join-Path $_.FullName 'raw') } $PdfCount = 0 $HtmlCount = 0 $TxtCount = 0 $SkipCount = 0 $ErrorCount = 0 Write-Host "Update-Snapshot: scanning $($DocDirs.Count) source directories..." -ForegroundColor Cyan foreach ($DocDir in $DocDirs) { $DocId = $DocDir.Name $RawDir = Join-Path $DocDir.FullName 'raw' $RawFiles = @(Get-ChildItem -Path $RawDir -File -ErrorAction SilentlyContinue) if ($RawFiles.Count -eq 0) { Write-Host " [$DocId] No raw files — skipping" -ForegroundColor Yellow $SkipCount++ continue } # Pick the primary raw file: PDF first, then HTML, then text $RawFile = $RawFiles | Where-Object { $_.Extension -eq '.pdf' } | Select-Object -First 1 if (-not $RawFile) { $RawFile = $RawFiles | Where-Object { $_.Extension -in '.html', '.htm' } | Select-Object -First 1 } if (-not $RawFile) { $RawFile = $RawFiles | Where-Object { $_.Extension -in '.txt', '.md' } | Select-Object -First 1 } if (-not $RawFile) { Write-Host " [$DocId] No supported file type — skipping" -ForegroundColor Yellow $SkipCount++ continue } $Ext = $RawFile.Extension.ToLower() if ($DryRun) { Write-Host " [$DocId] Would re-convert: $($RawFile.Name) ($Ext)" -ForegroundColor Gray continue } Write-Host "`n[$DocId] Re-converting: $($RawFile.Name)" -ForegroundColor Cyan try { # Track counts switch ($Ext) { '.pdf' { $PdfCount++ } { $_ -in '.html', '.htm' } { $HtmlCount++ } { $_ -in '.txt', '.md' } { $TxtCount++ } } $NewMarkdown = Invoke-ReConvert -FilePath $RawFile.FullName -Extension $Ext # Preserve existing provenance header if present $SnapshotPath = Join-Path $DocDir.FullName 'snapshot.md' $Header = Get-SnapshotHeader -SnapshotPath $SnapshotPath $FinalContent = if ($Header) { $Header + "`n" + $NewMarkdown } else { $NewMarkdown } Set-Content -Path $SnapshotPath -Value $FinalContent -Encoding UTF8 -ErrorAction Stop Write-Host " snapshot.md updated ($([int]$FinalContent.Length) chars)" -ForegroundColor Green } catch { $ErrorCount++ Write-Host " [$DocId] ERROR: $_" -ForegroundColor Yellow } } # ───────────────────────────────────────────────────────────────────────── # Summary # ───────────────────────────────────────────────────────────────────────── Write-Host '' Write-Host ' ════════════════════════════════════════════════' -ForegroundColor Cyan Write-Host ' Update-Snapshot complete' -ForegroundColor Green Write-Host ' ════════════════════════════════════════════════' -ForegroundColor Cyan Write-Host " PDF conversions : $PdfCount" -ForegroundColor White Write-Host " HTML conversions : $HtmlCount" -ForegroundColor White Write-Host " Text pass-through: $TxtCount" -ForegroundColor White Write-Host " Skipped : $SkipCount" -ForegroundColor Gray Write-Host " Errors : $ErrorCount" -ForegroundColor $(if ($ErrorCount -gt 0) { 'Red' } else { 'Gray' }) Write-Host '' } |