measure_group.ps1

$searchQueries = 1..5 # Simulate 5 search terms
$packagesPerQuery = 1..1000 # Simulate 1000 packages per query

Write-Host "Benchmarking deduplication logic..."

$allPackages = @()
foreach ($query in $searchQueries) {
    # Simulate finding packages with 50% duplicates
    $uniqueQueryPackages = $packagesPerQuery | ForEach-Object {
        [PSCustomObject]@{ Id = "Pkg.$query.$($_ % 500)"; Name = "Package $query $_" }
    }
    $allPackages += $uniqueQueryPackages
}

$sw = [System.Diagnostics.Stopwatch]::StartNew()
$result1 = $allPackages | Group-Object Id | ForEach-Object { $_.Group[0] }
$sw.Stop()
Write-Host "Inefficient (Group-Object) took: $($sw.ElapsedMilliseconds) ms. Count: $($result1.Count)"

$sw = [System.Diagnostics.Stopwatch]::StartNew()
$seen = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
$result2 = [System.Collections.Generic.List[PSCustomObject]]::new()
foreach ($pkg in $allPackages) {
    if ($seen.Add($pkg.Id)) {
        $result2.Add($pkg)
    }
}
$sw.Stop()
Write-Host "Optimized (HashSet) took: $($sw.ElapsedMilliseconds) ms. Count: $($result2.Count)"