Private/ProcessingHelpers.ps1

<#
.SYNOPSIS
    ConvertVTTAssets ProcessingHelpers - Enterprise file processing and memory management
.DESCRIPTION
    Private module containing enterprise-scale processing functions for chunked file discovery,
    memory management, progress tracking, and performance optimization. Enables processing
    of 50,000+ files with controlled memory usage and comprehensive progress reporting.
.AUTHOR
    Andres Yuhnke, Claude (Anthropic)
.VERSION
    1.6.0
.DATE
    2025-08-24
.COPYRIGHT
    (c) 2025 Andres Yuhnke. MIT License.
.NOTES
    Private functions included:
    - Get-ChunkedFileDiscovery: Memory-efficient file discovery in configurable chunks
    - Invoke-FileProcessingChunk: Chunked processing with progress tracking
    - Get-ProcessingStatistics: Performance metrics and operation statistics
    - Initialize-ProcessingEnvironment: Setup and validation for enterprise operations
    
    Enterprise capabilities:
    - Chunked processing for 50,000+ files without memory exhaustion
    - Progress estimation and real-time feedback
    - Memory management with garbage collection
    - Performance metrics and throughput tracking
#>


# [PROC-001] Memory-efficient chunked file discovery for enterprise scale
function Get-ChunkedFileDiscovery {
    param(
        [string]$Root,
        [bool]$Recurse = $true,
        [int]$ChunkSize = 1000,
        [scriptblock]$ExtensionFilter = $null,
        [switch]$EnableProgressEstimation
    )
    
    Write-Verbose "Starting chunked file discovery in: $Root"
    
    # [PROC-001.1] Get total item estimate for progress tracking if requested
    $totalItemsEstimate = if ($EnableProgressEstimation) {
        Write-Verbose "Estimating total items for progress tracking..."
        try {
            (Get-ChildItem -LiteralPath $Root -Recurse:$Recurse | Measure-Object).Count
        } catch {
            Write-Verbose "Could not estimate total items: $($_.Exception.Message)"
            -1
        }
    } else {
        -1
    }
    
    # [PROC-001.2] Discover directories first (must be processed sequentially)
    Write-Verbose "Discovering directories..."
    $allDirectories = Get-ChildItem -LiteralPath $Root -Directory -Recurse:$Recurse | 
        Sort-Object { $_.FullName.Split('\').Count } -Descending
    
    # [PROC-001.3] Discover and filter files with chunking
    Write-Verbose "Discovering files..."
    $allFiles = Get-ChildItem -LiteralPath $Root -File -Recurse:$Recurse
    
    # [PROC-001.4] Apply extension filtering if provided
    if ($ExtensionFilter) {
        $allFiles = $allFiles | Where-Object $ExtensionFilter
    }
    
    # [PROC-001.5] Calculate chunk information
    $totalFiles = @($allFiles).Count
    $totalFileChunks = [math]::Ceiling($totalFiles / $ChunkSize)
    
    Write-Verbose "Discovered $($allDirectories.Count) directories and $totalFiles files"
    Write-Verbose "Files will be processed in $totalFileChunks chunks of $ChunkSize"
    
    # [PROC-001.6] Return comprehensive discovery results
    return @{
        Directories = $allDirectories
        Files = $allFiles
        TotalFiles = $totalFiles
        TotalDirectories = $allDirectories.Count
        ChunkSize = $ChunkSize
        TotalFileChunks = $totalFileChunks
        TotalItemsEstimate = $totalItemsEstimate
    }
}

# [PROC-002] Initialize processing environment with validation and setup
function Initialize-ProcessingEnvironment {
    param(
        [string]$Root,
        [string]$OutputRoot = $null,
        [hashtable]$Settings,
        [switch]$GenerateReport
    )
    
    # [PROC-002.1] Validate root path exists
    if (-not (Test-Path -LiteralPath $Root)) {
        throw "Path not found: $Root"
    }
    
    # [PROC-002.2] Handle OutputRoot configuration if specified
    $useOutputRoot = -not [string]::IsNullOrWhiteSpace($OutputRoot)
    $outputRootFull = $null
    $rootFull = $null
    
    if ($useOutputRoot) {
        if (-not (Test-Path -LiteralPath $OutputRoot)) {
            if (-not $GenerateReport) {
                New-Item -ItemType Directory -Force -Path $OutputRoot | Out-Null
            }
        }
        $outputRootFull = (Resolve-Path -LiteralPath $OutputRoot).Path
        $rootFull = (Resolve-Path -LiteralPath $Root).Path
    }
    
    # [PROC-002.3] Initialize processing collections
    $processingContext = @{
        Root = $Root
        OutputRoot = $OutputRoot
        OutputRootFull = $outputRootFull
        RootFull = $rootFull
        UseOutputRoot = $useOutputRoot
        Settings = $Settings
        GenerateReport = $GenerateReport
        RenameOperations = @()
        Errors = @()
        Skipped = @()
        RenamedPaths = @{}
        OperationId = 0
        AnalysisItems = @()
        ProcessedItems = 0
    }
    
    return $processingContext
}

# [PROC-003] Process a single chunk of files with comprehensive tracking
function Invoke-FileProcessingChunk {
    param(
        [System.IO.FileInfo[]]$FileChunk,
        [hashtable]$Context,
        [int]$ChunkIndex,
        [int]$TotalChunks,
        [switch]$UseParallel
    )
    
    Write-Verbose "Processing chunk $($ChunkIndex + 1)/$TotalChunks with $($FileChunk.Count) files"
    
    $chunkResults = @()
    
    # [PROC-003.1] Display chunk progress if not silent
    if (-not $Context.Settings.Silent -and -not $Context.GenerateReport) {
        $chunkProgress = [math]::Round((($ChunkIndex + 1) / $TotalChunks) * 100, 0)
        Write-Host "Processing file chunk $($ChunkIndex + 1)/$TotalChunks ($chunkProgress%)" -ForegroundColor Cyan
    }
    
    # [PROC-003.2] Choose processing method based on parallel preference
    if ($UseParallel) {
        # [PROC-003.3] Parallel processing using ThreadJob engine
        $parallelSettings = @{
            RemoveMetadata = $Context.Settings.RemoveMetadata
            SpaceReplacement = $Context.Settings.SpaceReplacement
            LowercaseExtensions = $Context.Settings.LowercaseExtensions
            PreserveCase = $Context.Settings.PreserveCase
            ExpandAmpersand = $Context.Settings.ExpandAmpersand
            Force = $Context.Settings.Force
            ThrottleLimit = $Context.Settings.ThrottleLimit
            VerbosePreference = $VerbosePreference
            WhatIfPreference = $WhatIfPreference
        }
        
        # [PROC-003.4] Execute parallel processing
        $operationIdRef = [ref]$Context.OperationId
        $parallelResults = Invoke-FileNameOptimizationParallel -Files $FileChunk -Settings $parallelSettings -RenamedPaths $Context.RenamedPaths -OperationId $operationIdRef -OutputRoot $Context.OutputRoot -RootFull $Context.RootFull
        
        $Context.OperationId = $operationIdRef.Value
        $chunkResults = $parallelResults
        
    } else {
        # [PROC-003.5] Sequential processing with detailed progress tracking
        foreach ($file in $FileChunk) {
            $Context.OperationId++
            $Context.ProcessedItems++
            
            # [PROC-003.6] Display individual file progress
            if (-not $Context.Settings.Silent -and -not $Context.GenerateReport) {
                $fileProgress = [math]::Round(($Context.ProcessedItems / $Context.TotalFiles) * 100, 0)
                Write-Host (" [{0,3}%] File {1}/{2}: {3}" -f $fileProgress, $Context.ProcessedItems, $Context.TotalFiles, $file.Name) -ForegroundColor DarkCyan
            }
            
            # [PROC-003.7] Process individual file
            $result = Invoke-SingleFileProcessing -File $file -Context $Context
            $chunkResults += $result
        }
    }
    
    return $chunkResults
}

# [PROC-004] Process a single file with comprehensive path handling
function Invoke-SingleFileProcessing {
    param(
        [System.IO.FileInfo]$File,
        [hashtable]$Context
    )
    
    # [PROC-004.1] Update file path based on renamed directories
    $currentPath = $File.FullName
    $originalPath = $File.FullName
    
    foreach ($oldPath in $Context.RenamedPaths.Keys | Sort-Object -Property Length -Descending) {
        if ($currentPath.StartsWith($oldPath)) {
            $currentPath = $currentPath.Replace($oldPath, $Context.RenamedPaths[$oldPath])
            Write-Verbose "File path mapped: '$originalPath' -> '$currentPath'"
            break
        }
    }
    
    # [PROC-004.2] Skip files whose parent directories were renamed (for in-place operations)
    if (-not $Context.GenerateReport -and -not $Context.UseOutputRoot -and -not (Test-Path -LiteralPath $currentPath)) {
        if (-not $Context.Settings.Silent) {
            Write-Host " ⚠ Skipped: Parent directory was renamed" -ForegroundColor Yellow
        }
        return $null
    }
    
    # [PROC-004.3] Get current file item and directory information
    $currentItem = if ($Context.GenerateReport) { 
        $File 
    } else { 
        if ($Context.UseOutputRoot) {
            $File  # Always use original file info for OutputRoot operations
        } else {
            Get-Item -LiteralPath $currentPath
        }
    }

    $originalName = $currentItem.Name
    
    # [PROC-004.4] Update directory path for renamed directories
    $directory = $currentItem.DirectoryName
    foreach ($oldPath in $Context.RenamedPaths.Keys | Sort-Object -Property Length -Descending) {
        if ($directory.StartsWith($oldPath)) {
            $directory = $directory.Replace($oldPath, $Context.RenamedPaths[$oldPath])
            Write-Verbose "Directory path for file mapped: '$($currentItem.DirectoryName)' -> '$directory'"
            break
        }
    }
    
    # [PROC-004.5] Generate sanitized filename using FilenameHelpers
    $nameWithoutExt = [System.IO.Path]::GetFileNameWithoutExtension($originalName)
    $extension = [System.IO.Path]::GetExtension($originalName)
    
    $newName = Get-SanitizedName -Name $nameWithoutExt -Extension $extension -Settings $Context.Settings
    
    # [PROC-004.6] Calculate destination path based on OutputRoot configuration
    if ($Context.UseOutputRoot) {
        # Calculate relative path from Root to file's directory
        $rootUri = New-Object System.Uri(($Context.RootFull + '\'))
        $dirUri = New-Object System.Uri(($directory + '\'))
        $relUri = $rootUri.MakeRelativeUri($dirUri).ToString()
        $relPath = [System.Uri]::UnescapeDataString($relUri) -replace '/', '\'
        
        $destDir = if ([string]::IsNullOrWhiteSpace($relPath)) { 
            $Context.OutputRootFull 
        } else { 
            Join-Path $Context.OutputRootFull $relPath 
        }
        
        # [PROC-004.7] Create destination directory if needed
        if (-not (Test-Path -LiteralPath $destDir)) {
            if (-not $Context.GenerateReport) {
                New-Item -ItemType Directory -Force -Path $destDir | Out-Null
            }
        }
        
        $newPath = Join-Path $destDir $newName
    } else {
        $newPath = Join-Path $directory $newName
    }
    
    # [PROC-004.8] Return file processing result
    return @{
        OriginalPath = $File.FullName
        CurrentPath = $currentPath
        NewPath = $newPath
        OriginalName = $originalName
        NewName = $newName
        Directory = $directory
        NeedsChange = ($newName -ne $originalName)
        FileInfo = $currentItem
    }
}

# [PROC-005] Generate comprehensive processing statistics and performance metrics
function Get-ProcessingStatistics {
    param(
        [array]$Operations,
        [hashtable]$Context,
        [System.Diagnostics.Stopwatch]$Stopwatch = $null
    )
    
    # [PROC-005.1] Calculate basic operation statistics
    $stats = @{
        TotalOperations = $Operations.Count
        Successful = ($Operations | Where-Object { $_.Status -eq "Success" }).Count
        Failed = ($Operations | Where-Object { $_.Status -eq "Failed" }).Count
        Skipped = ($Operations | Where-Object { $_.Status -eq "Skipped" }).Count
        WhatIf = ($Operations | Where-Object { $_.Status -eq "WhatIf" }).Count
        AlreadyOptimized = ($Operations | Where-Object { $_.Status -eq "AlreadyOptimized" }).Count
    }
    
    # [PROC-005.2] Add performance metrics if stopwatch provided
    if ($Stopwatch) {
        $totalSeconds = $Stopwatch.Elapsed.TotalSeconds
        $stats.ProcessingTime = [math]::Round($totalSeconds, 2)
        $stats.ItemsPerSecond = if ($totalSeconds -gt 0) { 
            [math]::Round($stats.TotalOperations / $totalSeconds, 1) 
        } else { 0 }
        $stats.AverageTimePerItem = if ($stats.TotalOperations -gt 0) { 
            [math]::Round($totalSeconds / $stats.TotalOperations * 1000, 1)  # milliseconds
        } else { 0 }
    }
    
    # [PROC-005.3] Add memory and processing context information
    $stats.ProcessingMode = if ($Context.Settings.UseParallel) { "Parallel" } else { "Sequential" }
    $stats.ChunkSize = $Context.Settings.ChunkSize
    $stats.ThrottleLimit = if ($Context.Settings.UseParallel) { $Context.Settings.ThrottleLimit } else { $null }
    $stats.MemoryUsageMB = [math]::Round([System.GC]::GetTotalMemory($false) / 1MB, 1)
    
    return $stats
}

# [PROC-006] Force garbage collection for memory management during long operations
function Invoke-MemoryCleanup {
    param(
        [int]$ChunkIndex,
        [int]$CleanupInterval = 10
    )
    
    # [PROC-006.1] Perform garbage collection at specified intervals
    if ($ChunkIndex -gt 0 -and ($ChunkIndex % $CleanupInterval) -eq 0) {
        Write-Verbose "Forcing garbage collection after chunk $($ChunkIndex + 1)"
        [System.GC]::Collect()
        [System.GC]::WaitForPendingFinalizers()
        [System.GC]::Collect()
        
        # [PROC-006.2] Report memory usage after cleanup
        $memoryMB = [math]::Round([System.GC]::GetTotalMemory($false) / 1MB, 1)
        Write-Verbose "Memory usage after cleanup: ${memoryMB} MB"
    }
}