Public/Invoke-TfvcReplay.psm1
|
function Invoke-TfvcReplay { <# .SYNOPSIS Replays exported TFVC changesets as Git commits, one branch per mapping. .DESCRIPTION Reads changesets.json produced by Export-TfvcChangeset, downloads file content from TFVC at each changeset version, and creates a corresponding Git commit preserving author, date, comment, and work-item links. Each source mapping targets a Git branch (default 'main'). Branches are built as independent histories: for each branch, only the changesets that touch that branch's folder are replayed, in order. A changeset touching folders for two branches produces a commit on each. Supports checkpoint/resume and optional push of all branches. .PARAMETER ConfigPath Path to the migration config.json file. Defaults to ./config.json. .PARAMETER Resume Resume replay from the last replay-checkpoint.json. .PARAMETER Push Push all branches to the configured remote after replay completes. .EXAMPLE Invoke-TfvcReplay -ConfigPath ./config.json -Push #> [CmdletBinding()] param( [string]$ConfigPath = "./config.json", [switch]$Resume, [switch]$Push ) Set-StrictMode -Version Latest $ErrorActionPreference = 'Stop' # --- Bootstrap --- if (-not $ConfigPath) { $ConfigPath = "./config.json" } if (Test-Path -LiteralPath $ConfigPath -PathType Container) { $ConfigPath = Join-Path $ConfigPath 'config.json' } $config = Get-Content -Path $ConfigPath -Raw | ConvertFrom-Json $outputDir = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($config.outputDir) $logFile = Join-Path $outputDir 'migration-log.txt' $checkpointFile = Join-Path $outputDir 'replay-checkpoint.json' $changesetsFile = Join-Path $outputDir 'changesets.json' if (-not (Test-Path $changesetsFile)) { throw "Exported changesets not found at: $changesetsFile`nRun 'tfvc2git export' (or 'tfvc2git -DryRun') first." } $export = Get-Content -Path $changesetsFile -Raw | ConvertFrom-Json $changesets = $export.changesets $branches = Get-ConfigBranches -SourceMappings $config.sourceMappings $primaryBranch = Get-PrimaryBranch -SourceMappings $config.sourceMappings # How many file downloads to run concurrently per changeset (config-tunable). $downloadConcurrency = $(if ($null -ne $config.psobject.Properties['downloadConcurrency'] -and $config.downloadConcurrency) { [int]$config.downloadConcurrency } else { 8 }) # Add a Visual Studio .gitignore to each branch (after its history) unless disabled. $addGitignore = $(if ($null -ne $config.psobject.Properties['addGitignore']) { [bool]$config.addGitignore } else { $true }) Write-MigrationLog -Message "=== Git Replay started ===" -LogFile $logFile Write-MigrationLog -Message "Total changesets in export: $($changesets.Count)" -LogFile $logFile Write-MigrationLog -Message "Download concurrency: $downloadConcurrency" -LogFile $logFile Write-MigrationLog -Message "Target branches: $($branches -join ', ') (primary: $primaryBranch)" -LogFile $logFile # --- TFVC connection (for downloading files) --- $conn = New-TfvcConnection ` -ServerUrl $config.adoServerUrl ` -Collection $config.collection ` -Project $config.project ` -Pat $config.pat ` -ApiVersion $(if ($config.apiVersion) { $config.apiVersion } else { '7.0' }) # --- Git repo setup --- $repoPath = Join-Path $outputDir 'git-repo' if (-not (Test-Path (Join-Path $repoPath '.git'))) { Write-MigrationLog -Message "Initialising Git repo at $repoPath" -LogFile $logFile git init $repoPath Invoke-Git -C $repoPath config core.autocrlf false Invoke-Git -C $repoPath config core.safecrlf false # Windows MAX_PATH (260) otherwise makes 'git add' fail on deep .NET paths # (obj/, .vs/, generated files), which silently produced empty commits. Invoke-Git -C $repoPath config core.longpaths true if ($config.gitRemoteUrl) { Invoke-Git -C $repoPath remote add origin $config.gitRemoteUrl Write-MigrationLog -Message "Remote 'origin' set to $($config.gitRemoteUrl)" -LogFile $logFile } } # --- LFS availability check --- $lfsAvailable = $false try { $null = Invoke-Git lfs version 2>&1 if ($LASTEXITCODE -eq 0) { $lfsAvailable = $true Invoke-Git -C $repoPath lfs install --local 2>&1 | Out-Null Write-MigrationLog -Message "Git LFS is available and initialised" -LogFile $logFile } } catch { Write-MigrationLog -Message "Git LFS not available - large files will be committed directly" -Level WARN -LogFile $logFile } $script:redactedSecrets = [System.Collections.Generic.List[object]]::new() # --- LFS helpers --- $lfsThreshold = $(if ($config.lfsThresholdBytes) { $config.lfsThresholdBytes } else { 0 }) $lfsPatterns = @($(if ($config.lfsPatterns) { $config.lfsPatterns } else { @() })) # Tracking set for patterns already in the current branch's .gitattributes. $trackedLfsPatterns = [System.Collections.Generic.HashSet[string]]::new([StringComparer]::OrdinalIgnoreCase) $gitattributes = Join-Path $repoPath '.gitattributes' function Reset-LfsTracking { # Re-read .gitattributes for the branch we're currently on (empty after a # fresh orphan checkout), so LFS tracking is per-branch. $trackedLfsPatterns.Clear() if (Test-Path $gitattributes) { Get-Content $gitattributes | ForEach-Object { if ($_ -match '^\s*(\S+)\s+filter=lfs') { $trackedLfsPatterns.Add($Matches[1]) | Out-Null } } } } function Test-NeedsLfs { param( [string]$FilePath, [long]$SizeBytes ) if ($lfsThreshold -gt 0 -and $SizeBytes -ge $lfsThreshold) { return $true } $ext = [System.IO.Path]::GetExtension($FilePath) foreach ($pattern in $lfsPatterns) { $patExt = $pattern.TrimStart('*') if ($ext -eq $patExt) { return $true } } return $false } function Add-LfsTracking { param([string]$Pattern) if ($trackedLfsPatterns.Contains($Pattern)) { return } $trackedLfsPatterns.Add($Pattern) | Out-Null if ($lfsAvailable) { Push-Location $repoPath try { Invoke-Git lfs track $Pattern 2>&1 | Out-Null } finally { Pop-Location } Write-MigrationLog -Message "LFS tracking added for: $Pattern" -LogFile $logFile } else { # git-lfs is NOT installed: commit these files directly. Do NOT write a # 'filter=lfs' entry to .gitattributes - git would then try to run the # missing git-lfs clean filter on 'git add' and fail, which silently # produces empty commits and leaves files untracked. Install git-lfs # before migrating if you want large files stored via LFS. Write-MigrationLog -Message "git-lfs not available - committing '$Pattern' files directly (no LFS)." -Level WARN -LogFile $logFile } } # --- Helper: remove file and empty parent dirs --- function Remove-FileAndEmptyParents { param([string]$FilePath) if (Test-Path $FilePath) { Remove-Item -Path $FilePath -Force } $dir = Split-Path $FilePath -Parent while ($dir -and $dir -ne $repoPath -and (Test-Path $dir)) { $children = @(Get-ChildItem -Path $dir -Force) if ($children.Count -eq 0) { Remove-Item -Path $dir -Force $dir = Split-Path $dir -Parent } else { break } } } # --- Helper: the target branch of a single change (default 'main') --- function Get-ChangeBranch { param($Change) if ($null -ne $Change.psobject.Properties['branch'] -and $Change.branch) { return "$($Change.branch)" } return 'main' } # --- Helper: start a fresh, empty orphan branch --- function Start-GitBranch { param( [string]$Branch, [string]$ParentBranch ) # If any commit exists, detach so we can (re)create the branch from nothing. Invoke-Git -C $repoPath rev-parse --verify -q HEAD > $null 2>&1 if ($LASTEXITCODE -eq 0) { Invoke-Git -C $repoPath checkout --detach 2>&1 | Out-Null Invoke-Git -C $repoPath branch -D $Branch 2>&1 | Out-Null } if ($ParentBranch) { Write-MigrationLog -Message "Basing branch '$Branch' on parent '$ParentBranch'" -LogFile $logFile Invoke-Git -C $repoPath checkout -b $Branch $ParentBranch 2>&1 | Out-Null } else { Invoke-Git -C $repoPath checkout --orphan $Branch 2>&1 | Out-Null } Invoke-Git -C $repoPath read-tree --empty 2>&1 | Out-Null # Physically clear the working tree (except .git) so the branch starts empty. Get-ChildItem -LiteralPath $repoPath -Force | Where-Object { $_.Name -ne '.git' } | Remove-Item -Recurse -Force -ErrorAction SilentlyContinue } # --- Helper: persist checkpoint --- function Save-ReplayCheckpoint { param( [string[]]$CompletedBranches, [string]$CurrentBranch, [int]$LastChangesetId, [int]$TotalReplayed ) $lastHash = (Invoke-Git -C $repoPath rev-parse HEAD 2>&1).Trim() @{ completedBranches = @($CompletedBranches) currentBranch = $CurrentBranch lastChangesetId = $LastChangesetId lastCommitHash = $lastHash totalReplayed = $TotalReplayed } | ConvertTo-Json | Set-Content -Path $checkpointFile -Encoding UTF8 } # --- Helper: apply one changeset's changes and commit on the current branch --- function Write-ChangesetCommit { param($Changeset, $Changes) $cs = $Changeset # Pass 1: apply filesystem ops (deletes, rename-old removal) and collect # the file downloads so they can run concurrently. $downloads = [System.Collections.Generic.List[object]]::new() foreach ($change in $Changes) { $destFile = Join-Path $repoPath $change.destinationPath switch ($change.changeType) { { $_ -in 'add', 'edit', 'branch', 'merge', 'undelete' } { $downloads.Add(@{ ServerPath = $change.serverPath; OutputPath = $destFile; ChangesetVersion = $cs.changesetId }) } 'delete' { Remove-FileAndEmptyParents -FilePath $destFile } 'rename' { if ($change.sourceServerPath) { foreach ($m in $config.sourceMappings) { $oldDest = ConvertTo-RelativePath -ServerPath $change.sourceServerPath -TfvcBase $m.tfvcPath -DestinationPrefix $(if ($m.destinationPath) { $m.destinationPath } else { '' }) if ($oldDest) { Remove-FileAndEmptyParents -FilePath (Join-Path $repoPath $oldDest) break } } } $downloads.Add(@{ ServerPath = $change.serverPath; OutputPath = $destFile; ChangesetVersion = $cs.changesetId }) } } } # Pass 2: download this changeset's files concurrently. if ($downloads.Count -gt 0) { Invoke-ParallelDownload -Connection $conn -Items $downloads.ToArray() -Concurrency $downloadConcurrency } # Pass 2.5: Secret Scanning if ($config.secretScanningEnabled) { foreach ($d in $downloads) { if (Test-Path $d.OutputPath) { $wasCleaned = Invoke-SecretScanAndClean -FilePath $d.OutputPath -Patterns $config.secretPatterns -ReplacementToken $config.secretReplacementToken if ($wasCleaned) { Write-MigrationLog -Message "Secret redacted in $($d.ServerPath) at Changeset $($cs.changesetId)" -Level WARN -LogFile $logFile $script:redactedSecrets.Add(@{ ChangesetId = $cs.changesetId ServerPath = $d.ServerPath Branch = $b }) } } } } # Pass 3: LFS tracking for any downloaded file that needs it. foreach ($d in $downloads) { if (Test-Path $d.OutputPath) { $fileSize = (Get-Item $d.OutputPath).Length if (Test-NeedsLfs -FilePath $d.OutputPath -SizeBytes $fileSize) { $ext = [System.IO.Path]::GetExtension($d.OutputPath) if ($ext) { Add-LfsTracking -Pattern "*$ext" } } } } $addOut = Invoke-Git -C $repoPath add -A 2>&1 if ($LASTEXITCODE -ne 0) { throw "git add failed for changeset $($cs.changesetId) (exit $LASTEXITCODE): $addOut" } # Diagnostic: a changeset that downloaded files but stages nothing means git # silently skipped them (path length, etc.) - surface it instead of an empty commit. if ($downloads.Count -gt 0) { Invoke-Git -C $repoPath diff --cached --quiet | Out-Null if ($LASTEXITCODE -eq 0) { Write-MigrationLog -Message "Changeset $($cs.changesetId): $($downloads.Count) file(s) downloaded but nothing staged (git skipped them - check path length)." -Level WARN -LogFile $logFile } } $body = $(if ($cs.comment) { $cs.comment } else { '' }) $trailer = "`n---" $trailer += "`nTFVC-Changeset: $($cs.changesetId)" $trailer += "`nTFVC-Author: $($cs.author)" $trailer += "`nTFVC-Date: $($cs.createdDate)" if ($cs.workItems -and @($cs.workItems).Count -gt 0) { $wiRefs = ($cs.workItems | ForEach-Object { "#$($_.id)" }) -join ', ' $trailer += "`nTFVC-WorkItems: $wiRefs" } $commitMsg = "$body$trailer" $tempMsgFile = Join-Path $outputDir "commit-msg-$($cs.changesetId).tmp" [System.IO.File]::WriteAllText($tempMsgFile, $commitMsg, [System.Text.Encoding]::UTF8) try { $env:GIT_AUTHOR_NAME = $cs.author $env:GIT_AUTHOR_EMAIL = "$($cs.author)@tfvc.local" $env:GIT_AUTHOR_DATE = $cs.createdDate # Set committer too (the repo has no user.name/email configured, so # without this 'git commit' can fail with "committer identity unknown"). $env:GIT_COMMITTER_NAME = $cs.author $env:GIT_COMMITTER_EMAIL = "$($cs.author)@tfvc.local" $env:GIT_COMMITTER_DATE = $cs.createdDate $commitOut = Invoke-Git -C $repoPath commit -F $tempMsgFile --allow-empty 2>&1 if ($LASTEXITCODE -ne 0) { throw "git commit failed for changeset $($cs.changesetId) (exit $LASTEXITCODE): $commitOut" } } finally { $env:GIT_AUTHOR_NAME = $null $env:GIT_AUTHOR_EMAIL = $null $env:GIT_AUTHOR_DATE = $null $env:GIT_COMMITTER_NAME = $null $env:GIT_COMMITTER_EMAIL = $null $env:GIT_COMMITTER_DATE = $null } Remove-Item $tempMsgFile -ErrorAction SilentlyContinue } # --- Helper: add a Visual Studio .gitignore as a final commit on the branch --- function Add-GitignoreCommit { $giPath = Join-Path $repoPath '.gitignore' if (Test-Path $giPath) { return } # branch already has one (migrated from TFVC) [System.IO.File]::WriteAllText($giPath, (Get-VisualStudioGitignore), [System.Text.Encoding]::UTF8) $addOut = Invoke-Git -C $repoPath add -- .gitignore 2>&1 if ($LASTEXITCODE -ne 0) { throw "git add .gitignore failed (exit $LASTEXITCODE): $addOut" } # Marker in the footer so verification doesn't flag this as an orphan commit. $msg = "Add .gitignore (Visual Studio template)`n`n---`nTfvc2Git-Generated: gitignore" $tmp = Join-Path $outputDir 'gitignore-msg.tmp' [System.IO.File]::WriteAllText($tmp, $msg, [System.Text.Encoding]::UTF8) try { $env:GIT_AUTHOR_NAME = 'tfvc2git' $env:GIT_AUTHOR_EMAIL = 'noreply@tfvc2git.local' $env:GIT_COMMITTER_NAME = 'tfvc2git' $env:GIT_COMMITTER_EMAIL = 'noreply@tfvc2git.local' $commitOut = Invoke-Git -C $repoPath commit -F $tmp 2>&1 if ($LASTEXITCODE -ne 0) { throw "git commit .gitignore failed (exit $LASTEXITCODE): $commitOut" } } finally { $env:GIT_AUTHOR_NAME = $null $env:GIT_AUTHOR_EMAIL = $null $env:GIT_COMMITTER_NAME = $null $env:GIT_COMMITTER_EMAIL = $null } Remove-Item $tmp -ErrorAction SilentlyContinue Write-MigrationLog -Message "Added Visual Studio .gitignore to branch" -LogFile $logFile } # --- Group changesets per branch (ascending order is preserved) --- $byBranch = [ordered]@{} foreach ($b in $branches) { $byBranch[$b] = [System.Collections.Generic.List[object]]::new() } foreach ($cs in $changesets) { $grp = @{} foreach ($ch in $cs.changes) { $cb = Get-ChangeBranch -Change $ch if (-not $grp.ContainsKey($cb)) { $grp[$cb] = [System.Collections.Generic.List[object]]::new() } $grp[$cb].Add($ch) } if ($grp.Count -eq 0) { # Changeset touches nothing in scope - keep an empty commit on the # primary branch so every changeset still maps to a commit (audit). $byBranch[$primaryBranch].Add([pscustomobject]@{ cs = $cs; changes = @() }) continue } foreach ($cb in $grp.Keys) { if (-not $byBranch.Contains($cb)) { $byBranch[$cb] = [System.Collections.Generic.List[object]]::new() $branches += $cb } $byBranch[$cb].Add([pscustomobject]@{ cs = $cs; changes = @($grp[$cb]) }) } } # --- Resume state --- $completedBranches = @() $resumeCurrentBranch = '' $resumeAfterId = 0 $totalReplayed = 0 if ($Resume -and (Test-Path $checkpointFile)) { $checkpoint = Get-Content $checkpointFile -Raw | ConvertFrom-Json if ($null -ne $checkpoint.psobject.Properties['completedBranches'] -and $checkpoint.completedBranches) { $completedBranches = @($checkpoint.completedBranches) } if ($null -ne $checkpoint.psobject.Properties['currentBranch']) { $resumeCurrentBranch = "$($checkpoint.currentBranch)" } if ($null -ne $checkpoint.psobject.Properties['lastChangesetId']) { $resumeAfterId = [int]$checkpoint.lastChangesetId } if ($null -ne $checkpoint.psobject.Properties['totalReplayed'] -and $checkpoint.totalReplayed) { $totalReplayed = [int]$checkpoint.totalReplayed } Write-MigrationLog -Message "Resuming: completed [$($completedBranches -join ', ')], current '$resumeCurrentBranch' after changeset $resumeAfterId" -LogFile $logFile } # --- Build each branch as an independent history --- foreach ($b in $branches) { if ($completedBranches -contains $b) { Write-MigrationLog -Message "Branch '$b' already complete - skipping" -LogFile $logFile continue } $branchItems = $byBranch[$b] if (-not $branchItems -or $branchItems.Count -eq 0) { Write-MigrationLog -Message "No changesets target branch '$b' - skipping" -LogFile $logFile $completedBranches += $b continue } $branchResumeAfterId = 0 if ($Resume -and $b -eq $resumeCurrentBranch -and $resumeAfterId -gt 0) { Write-MigrationLog -Message "Resuming branch '$b' after changeset $resumeAfterId" -LogFile $logFile Invoke-Git -C $repoPath checkout $b 2>&1 | Out-Null $branchResumeAfterId = $resumeAfterId } else { Write-MigrationLog -Message "Building branch '$b' ($($branchItems.Count) changeset(s))" -LogFile $logFile $parentBranch = '' foreach ($m in $config.sourceMappings) { if ((Get-MappingBranch -Mapping $m) -eq $b) { $parentBranch = Get-MappingParentBranch -Mapping $m break } } Start-GitBranch -Branch $b -ParentBranch $parentBranch } Reset-LfsTracking $bi = 0 $bcount = $branchItems.Count foreach ($item in $branchItems) { $bi++ $cs = $item.cs if ($branchResumeAfterId -gt 0 -and $cs.changesetId -le $branchResumeAfterId) { continue } if ($bi % 50 -eq 0 -or $bi -eq 1 -or $bi -eq $bcount) { Write-MigrationLog -Message " [$b] changeset $($cs.changesetId) ($bi / $bcount)" -LogFile $logFile } Write-ChangesetCommit -Changeset $cs -Changes $item.changes $totalReplayed++ if ($totalReplayed % 50 -eq 0) { Save-ReplayCheckpoint -CompletedBranches $completedBranches -CurrentBranch $b -LastChangesetId $cs.changesetId -TotalReplayed $totalReplayed } } if ($addGitignore) { Add-GitignoreCommit } $completedBranches += $b Save-ReplayCheckpoint -CompletedBranches $completedBranches -CurrentBranch '' -LastChangesetId 0 -TotalReplayed $totalReplayed Write-MigrationLog -Message "Branch '$b' complete" -LogFile $logFile } # Leave the repo on the primary branch (its natural default). Invoke-Git -C $repoPath checkout $primaryBranch 2>&1 | Out-Null Write-MigrationLog -Message "Replay complete. $totalReplayed commits across $($branches.Count) branch(es)." -LogFile $logFile # --- Push (all branches) --- if ($Push) { Write-MigrationLog -Message "Pushing all branches to remote..." -LogFile $logFile $pushOut = Invoke-Git -C $repoPath push -u origin --all 2>&1 if ($LASTEXITCODE -ne 0) { Write-MigrationLog -Message "Push failed: $pushOut" -Level ERROR -LogFile $logFile throw "Failed to push branches to remote 'origin' (exit code $LASTEXITCODE). See log for details: $pushOut" } Write-MigrationLog -Message "Push complete (branches: $($branches -join ', '))" -LogFile $logFile } Write-MigrationLog -Message "=== Git Replay finished ===" -LogFile $logFile if ($script:redactedSecrets.Count -gt 0) { $redactedSecretsFile = Join-Path $outputDir 'redacted-secrets.json' $script:redactedSecrets | ConvertTo-Json -Depth 5 | Set-Content $redactedSecretsFile -Encoding UTF8 Write-MigrationLog -Message "Wrote redacted secrets report to $redactedSecretsFile" -LogFile $logFile } } |