Public/Invoke-PIIAudit.ps1

# Copyright (c) 2026 Jeffrey Snover. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the project root.

function Invoke-PIIAudit {
    <#
    .SYNOPSIS
        Pre-public PII scanner for the AI Triad research repository.
    .DESCRIPTION
        Scans all files in the research repo (EXCLUDING sources/_inbox and .git)
        for patterns that suggest PII leakage from the private rolodex repo.

        Checks for:
            - Email address patterns (user@domain.tld)
            - Phone number patterns
            - Fields that should only exist in the private rolodex (e.g. "email", "notes" keys)
            - Any file path referencing the rolodex private repo

        Run this before flipping the repo to public.
    .EXAMPLE
        Invoke-PIIAudit
        # Returns findings or writes 'AUDIT PASSED'.
    .EXAMPLE
        Invoke-PIIAudit -Verbose
        # Prints each finding.
    #>

    [CmdletBinding()]
    param()

    Set-StrictMode -Version Latest
    $ErrorActionPreference = 'Stop'

    $RepoRoot = $script:RepoRoot

    $EmailPattern = [regex]::new('[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}')
    $PhonePattern = [regex]::new('\b(\+?1[\s.\-]?)?(\(?\d{3}\)?[\s.\-]?)?\d{3}[\s.\-]?\d{4}\b')

    $SkipDirectories = @('.git', '_inbox', 'node_modules', '__pycache__', '.venv')

    $Findings = [System.Collections.Generic.List[PSCustomObject]]::new()

    $AllFiles = Get-ChildItem -Path $RepoRoot -Recurse -File -ErrorAction SilentlyContinue

    foreach ($File in $AllFiles) {
        $RelativePath = $File.FullName.Substring($RepoRoot.Length + 1)
        $PathParts = $RelativePath -split '[/\\]'

        $ShouldSkip = $false
        foreach ($Part in $PathParts) {
            if ($Part -in $SkipDirectories) {
                $ShouldSkip = $true
                break
            }
        }
        if ($ShouldSkip) { continue }

        try {
            $Content = Get-Content -Path $File.FullName -Raw -Encoding utf8 -ErrorAction Stop
        }
        catch {
            $Findings.Add([PSCustomObject]@{
                File  = $RelativePath
                Type  = 'UNREADABLE'
                Match = "Could not read file: $($_.Exception.Message)"
            })
            continue
        }

        if ([string]::IsNullOrEmpty($Content)) { continue }

        $Patterns = @(
            @{ Pattern = $EmailPattern; Label = 'EMAIL' }
            @{ Pattern = $PhonePattern; Label = 'PHONE' }
        )

        foreach ($Entry in $Patterns) {
            $Matches = $Entry.Pattern.Matches($Content)
            foreach ($Match in $Matches) {
                $Findings.Add([PSCustomObject]@{
                    File  = $RelativePath
                    Type  = $Entry.Label
                    Match = $Match.Value
                })
            }
        }
    }

    if ($Findings.Count -gt 0) {
        Write-Output "AUDIT FAILED: $($Findings.Count) potential PII finding(s)."
        if ($VerbosePreference -eq 'Continue') {
            foreach ($Finding in $Findings) {
                Write-Verbose " [$($Finding.Type)] $($Finding.File): $($Finding.Match)"
            }
        }
        throw "PII audit found $($Findings.Count) finding(s). Review and remediate before making the repo public."
    }
    else {
        Write-Output 'AUDIT PASSED: No PII patterns found.'
    }
}