Private/ProcessingHelpers.ps1
<# .SYNOPSIS ConvertVTTAssets ProcessingHelpers - Enterprise file processing and memory management .DESCRIPTION Private module containing enterprise-scale processing functions for chunked file discovery, memory management, progress tracking, and performance optimization. Enables processing of 50,000+ files with controlled memory usage and comprehensive progress reporting. .AUTHOR Andres Yuhnke, Claude (Anthropic) .VERSION 1.6.0 .DATE 2025-08-24 .COPYRIGHT (c) 2025 Andres Yuhnke. MIT License. .NOTES Private functions included: - Get-ChunkedFileDiscovery: Memory-efficient file discovery in configurable chunks - Invoke-FileProcessingChunk: Chunked processing with progress tracking - Get-ProcessingStatistics: Performance metrics and operation statistics - Initialize-ProcessingEnvironment: Setup and validation for enterprise operations Enterprise capabilities: - Chunked processing for 50,000+ files without memory exhaustion - Progress estimation and real-time feedback - Memory management with garbage collection - Performance metrics and throughput tracking #> # [PROC-001] Memory-efficient chunked file discovery for enterprise scale function Get-ChunkedFileDiscovery { param( [string]$Root, [bool]$Recurse = $true, [int]$ChunkSize = 1000, [scriptblock]$ExtensionFilter = $null, [switch]$EnableProgressEstimation ) Write-Verbose "Starting chunked file discovery in: $Root" # [PROC-001.1] Get total item estimate for progress tracking if requested $totalItemsEstimate = if ($EnableProgressEstimation) { Write-Verbose "Estimating total items for progress tracking..." try { (Get-ChildItem -LiteralPath $Root -Recurse:$Recurse | Measure-Object).Count } catch { Write-Verbose "Could not estimate total items: $($_.Exception.Message)" -1 } } else { -1 } # [PROC-001.2] Discover directories first (must be processed sequentially) Write-Verbose "Discovering directories..." $allDirectories = Get-ChildItem -LiteralPath $Root -Directory -Recurse:$Recurse | Sort-Object { $_.FullName.Split('\').Count } -Descending # [PROC-001.3] Discover and filter files with chunking Write-Verbose "Discovering files..." $allFiles = Get-ChildItem -LiteralPath $Root -File -Recurse:$Recurse # [PROC-001.4] Apply extension filtering if provided if ($ExtensionFilter) { $allFiles = $allFiles | Where-Object $ExtensionFilter } # [PROC-001.5] Calculate chunk information $totalFiles = @($allFiles).Count $totalFileChunks = [math]::Ceiling($totalFiles / $ChunkSize) Write-Verbose "Discovered $($allDirectories.Count) directories and $totalFiles files" Write-Verbose "Files will be processed in $totalFileChunks chunks of $ChunkSize" # [PROC-001.6] Return comprehensive discovery results return @{ Directories = $allDirectories Files = $allFiles TotalFiles = $totalFiles TotalDirectories = $allDirectories.Count ChunkSize = $ChunkSize TotalFileChunks = $totalFileChunks TotalItemsEstimate = $totalItemsEstimate } } # [PROC-002] Initialize processing environment with validation and setup function Initialize-ProcessingEnvironment { param( [string]$Root, [string]$OutputRoot = $null, [hashtable]$Settings, [switch]$GenerateReport ) # [PROC-002.1] Validate root path exists if (-not (Test-Path -LiteralPath $Root)) { throw "Path not found: $Root" } # [PROC-002.2] Handle OutputRoot configuration if specified $useOutputRoot = -not [string]::IsNullOrWhiteSpace($OutputRoot) $outputRootFull = $null $rootFull = $null if ($useOutputRoot) { if (-not (Test-Path -LiteralPath $OutputRoot)) { if (-not $GenerateReport) { New-Item -ItemType Directory -Force -Path $OutputRoot | Out-Null } } $outputRootFull = (Resolve-Path -LiteralPath $OutputRoot).Path $rootFull = (Resolve-Path -LiteralPath $Root).Path } # [PROC-002.3] Initialize processing collections $processingContext = @{ Root = $Root OutputRoot = $OutputRoot OutputRootFull = $outputRootFull RootFull = $rootFull UseOutputRoot = $useOutputRoot Settings = $Settings GenerateReport = $GenerateReport RenameOperations = @() Errors = @() Skipped = @() RenamedPaths = @{} OperationId = 0 AnalysisItems = @() ProcessedItems = 0 } return $processingContext } # [PROC-003] Process a single chunk of files with comprehensive tracking function Invoke-FileProcessingChunk { param( [System.IO.FileInfo[]]$FileChunk, [hashtable]$Context, [int]$ChunkIndex, [int]$TotalChunks, [switch]$UseParallel ) Write-Verbose "Processing chunk $($ChunkIndex + 1)/$TotalChunks with $($FileChunk.Count) files" $chunkResults = @() # [PROC-003.1] Display chunk progress if not silent if (-not $Context.Settings.Silent -and -not $Context.GenerateReport) { $chunkProgress = [math]::Round((($ChunkIndex + 1) / $TotalChunks) * 100, 0) Write-Host "Processing file chunk $($ChunkIndex + 1)/$TotalChunks ($chunkProgress%)" -ForegroundColor Cyan } # [PROC-003.2] Choose processing method based on parallel preference if ($UseParallel) { # [PROC-003.3] Parallel processing using ThreadJob engine $parallelSettings = @{ RemoveMetadata = $Context.Settings.RemoveMetadata SpaceReplacement = $Context.Settings.SpaceReplacement LowercaseExtensions = $Context.Settings.LowercaseExtensions PreserveCase = $Context.Settings.PreserveCase ExpandAmpersand = $Context.Settings.ExpandAmpersand Force = $Context.Settings.Force ThrottleLimit = $Context.Settings.ThrottleLimit VerbosePreference = $VerbosePreference WhatIfPreference = $WhatIfPreference } # [PROC-003.4] Execute parallel processing $operationIdRef = [ref]$Context.OperationId $parallelResults = Invoke-FileNameOptimizationParallel -Files $FileChunk -Settings $parallelSettings -RenamedPaths $Context.RenamedPaths -OperationId $operationIdRef -OutputRoot $Context.OutputRoot -RootFull $Context.RootFull $Context.OperationId = $operationIdRef.Value $chunkResults = $parallelResults } else { # [PROC-003.5] Sequential processing with detailed progress tracking foreach ($file in $FileChunk) { $Context.OperationId++ $Context.ProcessedItems++ # [PROC-003.6] Display individual file progress if (-not $Context.Settings.Silent -and -not $Context.GenerateReport) { $fileProgress = [math]::Round(($Context.ProcessedItems / $Context.TotalFiles) * 100, 0) Write-Host (" [{0,3}%] File {1}/{2}: {3}" -f $fileProgress, $Context.ProcessedItems, $Context.TotalFiles, $file.Name) -ForegroundColor DarkCyan } # [PROC-003.7] Process individual file $result = Invoke-SingleFileProcessing -File $file -Context $Context $chunkResults += $result } } return $chunkResults } # [PROC-004] Process a single file with comprehensive path handling function Invoke-SingleFileProcessing { param( [System.IO.FileInfo]$File, [hashtable]$Context ) # [PROC-004.1] Update file path based on renamed directories $currentPath = $File.FullName $originalPath = $File.FullName foreach ($oldPath in $Context.RenamedPaths.Keys | Sort-Object -Property Length -Descending) { if ($currentPath.StartsWith($oldPath)) { $currentPath = $currentPath.Replace($oldPath, $Context.RenamedPaths[$oldPath]) Write-Verbose "File path mapped: '$originalPath' -> '$currentPath'" break } } # [PROC-004.2] Skip files whose parent directories were renamed (for in-place operations) if (-not $Context.GenerateReport -and -not $Context.UseOutputRoot -and -not (Test-Path -LiteralPath $currentPath)) { if (-not $Context.Settings.Silent) { Write-Host " ⚠ Skipped: Parent directory was renamed" -ForegroundColor Yellow } return $null } # [PROC-004.3] Get current file item and directory information $currentItem = if ($Context.GenerateReport) { $File } else { if ($Context.UseOutputRoot) { $File # Always use original file info for OutputRoot operations } else { Get-Item -LiteralPath $currentPath } } $originalName = $currentItem.Name # [PROC-004.4] Update directory path for renamed directories $directory = $currentItem.DirectoryName foreach ($oldPath in $Context.RenamedPaths.Keys | Sort-Object -Property Length -Descending) { if ($directory.StartsWith($oldPath)) { $directory = $directory.Replace($oldPath, $Context.RenamedPaths[$oldPath]) Write-Verbose "Directory path for file mapped: '$($currentItem.DirectoryName)' -> '$directory'" break } } # [PROC-004.5] Generate sanitized filename using FilenameHelpers $nameWithoutExt = [System.IO.Path]::GetFileNameWithoutExtension($originalName) $extension = [System.IO.Path]::GetExtension($originalName) $newName = Get-SanitizedName -Name $nameWithoutExt -Extension $extension -Settings $Context.Settings # [PROC-004.6] Calculate destination path based on OutputRoot configuration if ($Context.UseOutputRoot) { # Calculate relative path from Root to file's directory $rootUri = New-Object System.Uri(($Context.RootFull + '\')) $dirUri = New-Object System.Uri(($directory + '\')) $relUri = $rootUri.MakeRelativeUri($dirUri).ToString() $relPath = [System.Uri]::UnescapeDataString($relUri) -replace '/', '\' $destDir = if ([string]::IsNullOrWhiteSpace($relPath)) { $Context.OutputRootFull } else { Join-Path $Context.OutputRootFull $relPath } # [PROC-004.7] Create destination directory if needed if (-not (Test-Path -LiteralPath $destDir)) { if (-not $Context.GenerateReport) { New-Item -ItemType Directory -Force -Path $destDir | Out-Null } } $newPath = Join-Path $destDir $newName } else { $newPath = Join-Path $directory $newName } # [PROC-004.8] Return file processing result return @{ OriginalPath = $File.FullName CurrentPath = $currentPath NewPath = $newPath OriginalName = $originalName NewName = $newName Directory = $directory NeedsChange = ($newName -ne $originalName) FileInfo = $currentItem } } # [PROC-005] Generate comprehensive processing statistics and performance metrics function Get-ProcessingStatistics { param( [array]$Operations, [hashtable]$Context, [System.Diagnostics.Stopwatch]$Stopwatch = $null ) # [PROC-005.1] Calculate basic operation statistics $stats = @{ TotalOperations = $Operations.Count Successful = ($Operations | Where-Object { $_.Status -eq "Success" }).Count Failed = ($Operations | Where-Object { $_.Status -eq "Failed" }).Count Skipped = ($Operations | Where-Object { $_.Status -eq "Skipped" }).Count WhatIf = ($Operations | Where-Object { $_.Status -eq "WhatIf" }).Count AlreadyOptimized = ($Operations | Where-Object { $_.Status -eq "AlreadyOptimized" }).Count } # [PROC-005.2] Add performance metrics if stopwatch provided if ($Stopwatch) { $totalSeconds = $Stopwatch.Elapsed.TotalSeconds $stats.ProcessingTime = [math]::Round($totalSeconds, 2) $stats.ItemsPerSecond = if ($totalSeconds -gt 0) { [math]::Round($stats.TotalOperations / $totalSeconds, 1) } else { 0 } $stats.AverageTimePerItem = if ($stats.TotalOperations -gt 0) { [math]::Round($totalSeconds / $stats.TotalOperations * 1000, 1) # milliseconds } else { 0 } } # [PROC-005.3] Add memory and processing context information $stats.ProcessingMode = if ($Context.Settings.UseParallel) { "Parallel" } else { "Sequential" } $stats.ChunkSize = $Context.Settings.ChunkSize $stats.ThrottleLimit = if ($Context.Settings.UseParallel) { $Context.Settings.ThrottleLimit } else { $null } $stats.MemoryUsageMB = [math]::Round([System.GC]::GetTotalMemory($false) / 1MB, 1) return $stats } # [PROC-006] Force garbage collection for memory management during long operations function Invoke-MemoryCleanup { param( [int]$ChunkIndex, [int]$CleanupInterval = 10 ) # [PROC-006.1] Perform garbage collection at specified intervals if ($ChunkIndex -gt 0 -and ($ChunkIndex % $CleanupInterval) -eq 0) { Write-Verbose "Forcing garbage collection after chunk $($ChunkIndex + 1)" [System.GC]::Collect() [System.GC]::WaitForPendingFinalizers() [System.GC]::Collect() # [PROC-006.2] Report memory usage after cleanup $memoryMB = [math]::Round([System.GC]::GetTotalMemory($false) / 1MB, 1) Write-Verbose "Memory usage after cleanup: ${memoryMB} MB" } } |