tests/Hygiene/Analyze-FileSize.ps1
|
<#
.SYNOPSIS Analyzes file sizes and detects large files (blobs). .DESCRIPTION Provides detailed metrics about file sizes including total size, largest files, size distribution, and potential blob detection. .EXAMPLE .\Analyze-FileSize.ps1 #> [CmdletBinding()] param( [string]$ProjectRoot = (Split-Path (Split-Path $PSScriptRoot -Parent) -Parent), [int]$BlobThresholdKB = 100 ) Write-Host "`n=== File Size Analysis ===" -ForegroundColor Cyan Write-Host "Analyzing project: $ProjectRoot`n" # Get all files $allFiles = Get-ChildItem -Path $ProjectRoot -Recurse -File -Force -ErrorAction SilentlyContinue | Where-Object { $_.FullName -notmatch '[\\/]\.git[\\/]' } # Calculate total size $totalSizeBytes = ($allFiles | Measure-Object -Property Length -Sum).Sum $totalSizeKB = [Math]::Round($totalSizeBytes / 1KB, 2) $totalSizeMB = [Math]::Round($totalSizeBytes / 1MB, 2) # Get largest files $largestFiles = $allFiles | Sort-Object Length -Descending | Select-Object -First 10 | ForEach-Object { $relativePath = $_.FullName.Replace($ProjectRoot, "").TrimStart("\") [PSCustomObject]@{ Path = $relativePath SizeBytes = $_.Length SizeKB = [Math]::Round($_.Length / 1KB, 2) SizeMB = [Math]::Round($_.Length / 1MB, 2) } } # Detect blobs (files > threshold) $blobFiles = $allFiles | Where-Object { $_.Length -gt ($BlobThresholdKB * 1KB) } | ForEach-Object { $relativePath = $_.FullName.Replace($ProjectRoot, "").TrimStart("\") [PSCustomObject]@{ Path = $relativePath SizeKB = [Math]::Round($_.Length / 1KB, 2) } } # Average size by extension $avgSizeByExtension = $allFiles | Group-Object { [System.IO.Path]::GetExtension($_.Name) } | ForEach-Object { $ext = if ($_.Name) { $_.Name } else { "(no extension)" } $avgSize = ($_.Group | Measure-Object -Property Length -Average).Average [PSCustomObject]@{ Extension = $ext Count = $_.Count AvgSizeBytes = [Math]::Round($avgSize, 0) AvgSizeKB = [Math]::Round($avgSize / 1KB, 2) } } | Sort-Object AvgSizeKB -Descending # Build result $result = [PSCustomObject]@{ ProjectRoot = $ProjectRoot TotalFiles = $allFiles.Count TotalSizeBytes = $totalSizeBytes TotalSizeKB = $totalSizeKB TotalSizeMB = $totalSizeMB LargestFiles = $largestFiles BlobFiles = $blobFiles BlobThresholdKB = $BlobThresholdKB AverageSizeByExtension = $avgSizeByExtension } # Display results Write-Host "=== Size Metrics ===" -ForegroundColor Yellow Write-Host "Total Files: $($allFiles.Count)" Write-Host "Total Size: $totalSizeKB KB ($totalSizeMB MB)" Write-Host "Average File Size: $([Math]::Round($totalSizeBytes / $allFiles.Count / 1KB, 2)) KB" Write-Host "`n=== Top 10 Largest Files ===" -ForegroundColor Yellow $largestFiles | Format-Table Path, SizeKB, SizeMB -AutoSize if ($blobFiles.Count -gt 0) { Write-Host "`n⚠️ Blob Files Detected (>$BlobThresholdKB KB)" -ForegroundColor Red Write-Host "Found $($blobFiles.Count) file(s) exceeding threshold:`n" $blobFiles | Format-Table Path, SizeKB -AutoSize } else { Write-Host "`n✅ No blob files detected (all files <$BlobThresholdKB KB)" -ForegroundColor Green } Write-Host "`n=== Average Size by File Type ===" -ForegroundColor Yellow $avgSizeByExtension | Select-Object -First 10 | Format-Table -AutoSize # Export to JSON $outputPath = Join-Path $PSScriptRoot "file_size_results.json" $result | ConvertTo-Json -Depth 5 | Out-File $outputPath Write-Host "`nResults exported to: $outputPath" -ForegroundColor Green return $result |