Public/Test-TfvcMigration.ps1

function Test-TfvcMigration {
    <#
    .SYNOPSIS
        Three-pass, branch-aware verification of a TFVC-to-GitHub migration.
    .DESCRIPTION
        For each target branch, compares the migrated branch against its TFVC source:
          Pass 1 - File inventory comparison (per branch)
          Pass 2 - Content hash comparison (SHA-256, per branch)
          Pass 3 - Changeset-to-commit coverage (across all branches)
        Writes detailed results to $outputDir/verification/ and a summary JSON.
    .PARAMETER ConfigPath
        Path to config.json. Defaults to ./config.json.
    .EXAMPLE
        Test-TfvcMigration -ConfigPath ./config.json
    #>

    [CmdletBinding()]
    param(
        [string]$ConfigPath = "./config.json",
        [switch]$Push
    )

    $ErrorActionPreference = 'Stop'
    $logFile = $null

    try {
        # -- Bootstrap ----------------------------------------------------
        if (-not $ConfigPath) { $ConfigPath = "./config.json" }
        if (Test-Path -LiteralPath $ConfigPath -PathType Container) { $ConfigPath = Join-Path $ConfigPath 'config.json' }
        $config    = Get-Content -Path $ConfigPath -Raw | ConvertFrom-Json
        $outputDir = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($config.outputDir)
        $repoPath  = Join-Path $outputDir 'git-repo'
        $verifyDir = Join-Path $outputDir 'verification'
        $tempDir   = Join-Path $verifyDir 'temp'
        $logFile   = Join-Path $verifyDir 'verify.log'

        foreach ($d in @($verifyDir, $tempDir)) {
            if (-not (Test-Path $d)) { New-Item -Path $d -ItemType Directory -Force | Out-Null }
        }

        $conn = New-TfvcConnection `
            -ServerUrl  $config.adoServerUrl `
            -Collection $config.collection `
            -Project    $config.project `
            -Pat        $config.pat `
            -ApiVersion $(if ($config.apiVersion) { $config.apiVersion } else { '7.0' })

        $branches      = Get-ConfigBranches -SourceMappings $config.sourceMappings
        $primaryBranch = Get-PrimaryBranch  -SourceMappings $config.sourceMappings
        $downloadConcurrency = $(if ($null -ne $config.psobject.Properties['downloadConcurrency'] -and $config.downloadConcurrency) { [int]$config.downloadConcurrency } else { 8 })

        Write-MigrationLog "=== Verification started ===" -LogFile $logFile
        Write-MigrationLog "Branches to verify: $($branches -join ', ')" -LogFile $logFile

        # -- Pass 1 + 2 - per-branch inventory and hash -------------------
        $allOnlyInTfvc = [System.Collections.Generic.List[string]]::new()
        $allOnlyInGit  = [System.Collections.Generic.List[string]]::new()
        $cleanedOrphans = [System.Collections.Generic.List[string]]::new()
        $perBranch     = [System.Collections.Generic.List[object]]::new()
        $existingBranches = [System.Collections.Generic.List[string]]::new()
        $totalTfvc = 0; $totalGit = 0; $totalInBoth = 0

        $hashRows   = [System.Collections.Generic.List[string]]::new()
        $hashRows.Add("Branch,Path,TfvcSHA256,GitSHA256,Match")
        $mismatches = [System.Collections.Generic.List[object]]::new()
        $cleanedSecrets = [System.Collections.Generic.List[string]]::new()
        $matched    = 0
        $compared   = 0

        $redactedFilesSet = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase)
        $redactedSecretsFile = Join-Path $outputDir 'redacted-secrets.json'
        if (Test-Path $redactedSecretsFile) {
            $redactedData = Get-Content $redactedSecretsFile -Raw | ConvertFrom-Json
            if ($redactedData) {
                foreach ($r in @($redactedData)) {
                    [void]$redactedFilesSet.Add("$($r.Branch):$($r.ServerPath)")
                }
            }
        }

        foreach ($b in $branches) {
            Invoke-Git -C $repoPath rev-parse --verify -q "refs/heads/$b" > $null 2>&1
            $branchExists = ($LASTEXITCODE -eq 0)
            if ($branchExists) {
                [void]$existingBranches.Add($b)
                Invoke-Git -C $repoPath checkout $b 2>&1 | Out-Null
            }

            Write-MigrationLog "Pass 1 [$b]: file inventory" -LogFile $logFile

            # TFVC files for the mappings that target this branch
            $tfvcFiles  = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase)
            $pathLookup = @{}
            foreach ($mapping in $config.sourceMappings) {
                if ((Get-MappingBranch -Mapping $mapping) -ne $b) { continue }
                $items = Get-TfvcItems -Connection $conn -ScopePath $mapping.tfvcPath -RecursionLevel 'Full'
                foreach ($item in $items) {
                    if ($null -ne $item.psobject.Properties['isFolder'] -and $item.isFolder -eq $true) { continue }
                    if ($null -ne $item.psobject.Properties['gitObjectType'] -and $item.gitObjectType -eq 'tree') { continue }
                    $destPath = ConvertTo-RelativePath -ServerPath $item.path -TfvcBase $mapping.tfvcPath -DestinationPrefix $mapping.destinationPath
                    if (-not $destPath) { continue }
                    $normalized = $destPath.Replace('\', '/')
                    [void]$tfvcFiles.Add($normalized)
                    $pathLookup[$normalized] = $item.path
                }
            }

            # Git files on this branch
            $gitFiles = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase)
            if ($branchExists) {
                $gitOutput = Invoke-Git -C $repoPath ls-files 2>&1
                if ($LASTEXITCODE -ne 0) { throw "git ls-files failed on '$b': $gitOutput" }
                foreach ($line in ($gitOutput -split "`n")) {
                    $f = $line.Trim()
                    if ($f) { [void]$gitFiles.Add($f.Replace('\', '/')) }
                }
            }

            $onlyInTfvc = @($tfvcFiles | Where-Object { -not $gitFiles.Contains($_) })
            $onlyInGit  = @($gitFiles  | Where-Object { -not $tfvcFiles.Contains($_) -and $_ -ne '.gitattributes' -and $_ -ne '.gitignore' })

            if ($onlyInGit.Count -gt 0) {
                Write-MigrationLog " [$b] Cleaning up $($onlyInGit.Count) orphaned files..." -LogFile $logFile
                foreach ($f in $onlyInGit) {
                    Invoke-Git -C $repoPath rm --cached $f 2>&1 | Out-Null
                    $fullPath = Join-Path $repoPath $f
                    if (Test-Path $fullPath) { Remove-Item $fullPath -Force -ErrorAction SilentlyContinue }
                    $cleanedOrphans.Add("${b}:$f")
                }
                Invoke-Git -C $repoPath commit -m "Tfvc2Git-Generated: Remove orphaned files destroyed in TFVC`n`nThese files were present in historical changesets but no longer exist in the TFVC tip, likely due to a Destroy operation." 2>&1 | Out-Null
                $onlyInGit = @()
            }

            $inBoth     = @($tfvcFiles | Where-Object { $gitFiles.Contains($_) })

            foreach ($f in $onlyInTfvc) { [void]$allOnlyInTfvc.Add("${b}:$f") }
            foreach ($f in $onlyInGit)  { [void]$allOnlyInGit.Add("${b}:$f") }
            $totalTfvc += $tfvcFiles.Count; $totalGit += $gitFiles.Count; $totalInBoth += $inBoth.Count

            Write-MigrationLog " [$b] TFVC: $($tfvcFiles.Count) Git: $($gitFiles.Count) Matched: $($inBoth.Count) OnlyTfvc: $($onlyInTfvc.Count) OnlyGit: $($onlyInGit.Count)" -LogFile $logFile

            # Pass 2 - hashes for this branch. Download TFVC content concurrently
            # in batches (so we never materialise the whole tree of temp files at
            # once), then hash each downloaded file against the Git working copy.
            Write-MigrationLog "Pass 2 [$b]: content hashes ($($inBoth.Count) files)" -LogFile $logFile
            $batchSize = [Math]::Max(50, $downloadConcurrency * 25)
            for ($start = 0; $start -lt $inBoth.Count; $start += $batchSize) {
                $end   = [Math]::Min($start + $batchSize, $inBoth.Count) - 1
                $batch = @($inBoth[$start..$end] | ForEach-Object {
                    [pscustomobject]@{ DestPath = $_; ServerPath = $pathLookup[$_]; TempFile = (Join-Path $tempDir ([Guid]::NewGuid().ToString('N'))) }
                })

                try {
                    Invoke-ParallelDownload -Connection $conn `
                        -Items @($batch | ForEach-Object { @{ ServerPath = $_.ServerPath; OutputPath = $_.TempFile } }) `
                        -Concurrency $downloadConcurrency
                }
                catch {
                    Write-MigrationLog " [$b] batch download error: $_" -Level WARN -LogFile $logFile
                }

                foreach ($hi in $batch) {
                    $compared++
                    $destPath = $hi.DestPath
                    $serverPath = $hi.ServerPath
                    try {
                        if (-not (Test-Path $hi.TempFile)) { throw "TFVC content was not downloaded" }
                        
                        if ($config.secretScanningEnabled) {
                            $wasCleaned = Invoke-SecretScanAndClean -FilePath $hi.TempFile -Patterns $config.secretPatterns -ReplacementToken $config.secretReplacementToken
                            if ($wasCleaned) {
                                [void]$cleanedSecrets.Add("${b}:$destPath")
                            }
                        }

                        $tfvcHash = (Get-FileHash -Path $hi.TempFile -Algorithm SHA256).Hash
                        $gitHash  = (Get-FileHash -Path (Join-Path $repoPath $destPath) -Algorithm SHA256).Hash
                        $isMatch  = $tfvcHash -eq $gitHash
                        if ($isMatch) { 
                            $matched++ 
                        } elseif ($redactedFilesSet.Contains("${b}:${serverPath}")) {
                            $isMatch = 'Redacted'
                            $matched++
                        } else { 
                            $mismatches.Add(@{ path = "${b}:$destPath"; tfvcHash = $tfvcHash; gitHash = $gitHash }) 
                        }
                        $hashRows.Add("$b,$destPath,$tfvcHash,$gitHash,$isMatch")
                    }
                    catch {
                        Write-MigrationLog " Error hashing [$b] ${destPath}: $_" -Level ERROR -LogFile $logFile
                        $hashRows.Add("$b,$destPath,ERROR,ERROR,False")
                        $mismatches.Add(@{ path = "${b}:$destPath"; tfvcHash = 'ERROR'; gitHash = 'ERROR' })
                    }
                    finally {
                        if (Test-Path $hi.TempFile) { Remove-Item $hi.TempFile -Force -ErrorAction SilentlyContinue }
                    }
                }
            }

            $perBranch.Add([ordered]@{
                branch     = $b
                exists     = $branchExists
                tfvcFiles  = $tfvcFiles.Count
                gitFiles   = $gitFiles.Count
                matched    = $inBoth.Count
                onlyInTfvc = $onlyInTfvc.Count
                onlyInGit  = $onlyInGit.Count
            })
        }

        $inventoryDiff = @{
            onlyInTfvc = @($allOnlyInTfvc)
            onlyInGit  = @($allOnlyInGit)
            perBranch  = @($perBranch)
        }
        $inventoryDiff | ConvertTo-Json -Depth 5 | Set-Content (Join-Path $verifyDir 'inventory-diff.json') -Encoding UTF8
        $hashRows | Set-Content (Join-Path $verifyDir 'hash-comparison.csv') -Encoding UTF8

        if (Test-Path $tempDir) { Remove-Item $tempDir -Recurse -Force -ErrorAction SilentlyContinue }
        Write-MigrationLog " Total compared: $compared Matched: $matched Mismatched: $($mismatches.Count)" -LogFile $logFile

        # -- Pass 3 - Changeset Coverage (per branch, then aggregate) -----
        Write-MigrationLog "Pass 3: changeset coverage" -LogFile $logFile

        $changesetsFile = Join-Path $outputDir 'changesets.json'
        $exportData = Get-Content $changesetsFile -Raw | ConvertFrom-Json
        $exportedIds = [System.Collections.Generic.HashSet[int]]::new()
        foreach ($cs in $exportData.changesets) { [void]$exportedIds.Add([int]$cs.changesetId) }

        $mappedIds       = [System.Collections.Generic.HashSet[int]]::new()
        $orphanedCommits = [System.Collections.Generic.List[string]]::new()
        $mappingRows     = [System.Collections.Generic.List[string]]::new()
        $mappingRows.Add("ChangesetId,Branch,GitCommitHash,Author,Date,Comment")
        $totalMappedCommits = 0

        foreach ($b in $existingBranches) {
            $gitLogOutput = Invoke-Git -C $repoPath log $b --format="COMMIT_START|||%H|||%an|||%ai|||%s|||%b" 2>&1
            if ($LASTEXITCODE -ne 0) { throw "git log failed on '$b': $gitLogOutput" }

            $commits = @()
            $currentCommit = $null

            foreach ($line in ($gitLogOutput -split "`n")) {
                $line = $line.Trim()
                if (-not $line) { continue }
                
                if ($line -match '^COMMIT_START\|\|\|') {
                    if ($currentCommit) { $commits += $currentCommit }
                    $parts  = $line -split '\|\|\|', 6
                    $currentCommit = @{
                        hash   = $parts[1].Trim()
                        author = $parts[2].Trim()
                        date   = $parts[3].Trim()
                        subj   = if ($parts.Count -gt 4) { $parts[4].Trim() } else { '' }
                        body   = if ($parts.Count -gt 5) { $parts[5].Trim() } else { '' }
                    }
                } elseif ($currentCommit) {
                    $currentCommit.body += "`n" + $line
                }
            }
            if ($currentCommit) { $commits += $currentCommit }

            foreach ($c in $commits) {
                $hash   = $c.hash
                $author = $c.author
                $date   = $c.date
                $subj   = $c.subj
                $body   = $c.body

                # Commits tfvc2git generated itself (e.g. the .gitignore commit)
                # are not changesets and must not count as orphaned.
                if ($body -match 'Tfvc2Git-Generated' -or $subj -match 'Tfvc2Git-Generated') { continue }

                $csId = $null
                if ($body -match 'TFVC-Changeset:\s*(\d+)') { $csId = [int]$Matches[1] }
                elseif ($subj -match 'TFVC-Changeset:\s*(\d+)') { $csId = [int]$Matches[1] }

                if ($csId) {
                    [void]$mappedIds.Add($csId)
                    $totalMappedCommits++
                    $safeComment = $subj -replace '"', '""'
                    $mappingRows.Add("$csId,$b,$hash,$author,$date,`"$safeComment`"")
                }
                else {
                    $orphanedCommits.Add($hash)
                }
            }
        }

        $unmappedCs = @($exportedIds | Where-Object { -not $mappedIds.Contains($_) })
        foreach ($csId in $unmappedCs) { $mappingRows.Add("$csId,,,,,") }
        $mappingRows | Set-Content (Join-Path $verifyDir 'changeset-mapping.csv') -Encoding UTF8

        Write-MigrationLog " Exported changesets: $($exportedIds.Count) Mapped commits: $totalMappedCommits Unmapped: $($unmappedCs.Count) Orphaned: $($orphanedCommits.Count)" -LogFile $logFile

        # --- Pass 4: Remote push verification ---
        if ($Push) {
            Write-MigrationLog "Pushing to remote ($($config.gitRemoteUrl))..." -LogFile $logFile
            Invoke-Git -C $repoPath push -u origin --all 2>&1 | Out-Null
            Invoke-Git -C $repoPath push --tags 2>&1 | Out-Null
        }

        $remoteResult = 'N/A'
        $remoteUnpushed = [System.Collections.Generic.List[string]]::new()
        $remoteUrlOut = Invoke-Git -C $repoPath config --get remote.origin.url 2>&1
        $remoteUrl = if ($LASTEXITCODE -eq 0 -and $remoteUrlOut) { ($remoteUrlOut | Select-Object -First 1).Trim() } else { '' }
        
        if ($remoteUrl) {
            Write-MigrationLog "Pass 4: remote push verification (origin)" -LogFile $logFile
            foreach ($b in $existingBranches) {
                $localHashOut = Invoke-Git -C $repoPath rev-parse $b 2>&1
                $localHash = if ($LASTEXITCODE -eq 0 -and $localHashOut) { ($localHashOut | Select-Object -First 1).Trim() } else { '' }
                
                $remoteHashOut = Invoke-Git -C $repoPath rev-parse "origin/$b" 2>&1
                $remoteHash = if ($LASTEXITCODE -eq 0 -and $remoteHashOut) { ($remoteHashOut | Select-Object -First 1).Trim() } else { '' }
                
                if (-not $localHash -or $localHash -ne $remoteHash) {
                    $remoteUnpushed.Add($b)
                }
            }
            if ($remoteUnpushed.Count -eq 0) {
                Write-MigrationLog " All $($existingBranches.Count) branch(es) are up-to-date on remote." -LogFile $logFile
                $remoteResult = 'PASS'
            } else {
                Write-MigrationLog " [x] $($remoteUnpushed.Count) branch(es) NOT up-to-date on remote: $($remoteUnpushed -join ', ')" -LogFile $logFile
                $remoteResult = 'FAIL'
            }
        }

        # Leave the repo on the primary branch.
        Invoke-Git -C $repoPath checkout $primaryBranch 2>&1 | Out-Null

        # -- Summary ------------------------------------------------------
        $invResult  = if ($allOnlyInTfvc.Count -eq 0 -and $allOnlyInGit.Count -eq 0) { 'PASS' } else { 'FAIL' }
        $hashResult = if ($mismatches.Count -eq 0) { 'PASS' } else { 'FAIL' }
        $csResult   = if ($unmappedCs.Count -eq 0 -and $orphanedCommits.Count -eq 0) { 'PASS' } else { 'FAIL' }
        $overall    = if ($invResult -eq 'PASS' -and $hashResult -eq 'PASS' -and $csResult -eq 'PASS' -and ($remoteResult -eq 'PASS' -or $remoteResult -eq 'N/A')) { 'PASS' } else { 'FAIL' }

        $summary = [ordered]@{
            verificationDate = (Get-Date).ToString('o')
            overallResult    = $overall
            branches         = @($branches)
            perBranch        = @($perBranch)
            inventoryCheck   = [ordered]@{
                result         = $invResult
                totalTfvcFiles = $totalTfvc
                totalGitFiles  = $totalGit
                onlyInTfvc     = @($allOnlyInTfvc)
                onlyInGit      = @($allOnlyInGit)
                cleanedOrphans = @($cleanedOrphans)
                matchCount     = $totalInBoth
            }
            hashCheck        = [ordered]@{
                result         = $hashResult
                totalCompared  = $compared
                matched        = $matched
                mismatched     = $mismatches.Count
                cleanedSecrets = @($cleanedSecrets)
                mismatches     = @($mismatches | ForEach-Object { [ordered]@{ path = $_.path; tfvcHash = $_.tfvcHash; gitHash = $_.gitHash } })
            }
            changesetCoverage = [ordered]@{
                result                  = $csResult
                totalExportedChangesets = $exportedIds.Count
                totalMappedCommits      = $totalMappedCommits
                unmappedChangesets      = @($unmappedCs)
                orphanedCommits         = @($orphanedCommits)
            }
            remotePushVerification = [ordered]@{
                result             = $remoteResult
                remoteUrl          = $remoteUrl
                unpushedBranches   = @($remoteUnpushed)
            }
        }

        $summary | ConvertTo-Json -Depth 10 | Set-Content (Join-Path $verifyDir 'summary.json') -Encoding UTF8

        Write-MigrationLog "-------------------------------" -LogFile $logFile
        Write-MigrationLog "=== Verification complete ===" -LogFile $logFile
        Write-MigrationLog "Overall result: $overall" -LogFile $logFile
        Write-MigrationLog " Inventory: $invResult"  -LogFile $logFile
        Write-MigrationLog " Hashes: $hashResult" -LogFile $logFile
        Write-MigrationLog " Changesets: $csResult"   -LogFile $logFile
        if ($remoteResult -ne 'N/A') { Write-MigrationLog " RemotePush: $remoteResult" -LogFile $logFile }
        Write-MigrationLog "Results written to: $verifyDir" -LogFile $logFile
    }
    catch {
        if ($logFile) {
            Write-MigrationLog "FATAL: $_" -Level ERROR -LogFile $logFile
            Write-MigrationLog $_.ScriptStackTrace -Level ERROR -LogFile $logFile
        }
        throw
    }
}