Types/OpenPackage/get_LanguagePercent.ps1
|
<# .SYNOPSIS Gets the language percentages of a package .DESCRIPTION Gets the language percentages present in the package. .NOTES Definitions of what constitutes a language have been quite contentious. For the purposes of accurately identifying what lies within a package, we want a very broad definition. If you believe a language should be included, file an issue. If you believe any given file format is or is not a language, do not file an issue. #> $LanguagesByLength = [Ordered]@{} $totalLength = 0 $fileSizes = $this.FileSize foreach ($part in $this.GetParts()) { $partLength = $fileSizes[$part.Uri] $recognizedLanguage = switch -regex ($part.Uri) { '\.3mf$' { '3MF'} '\.astro' { 'Astro' } '\.c$' { 'C' } '\.cast$' { 'Asciiema' } '\.clixml$' { 'Clixml'} '\.cjs$' { 'Common JavaScript'} '\.cpp$' { 'C++' } '\.cs$' { 'C# '} '\.csv$' { 'Comma Separated Values' } '\.csh$' { 'CShell'} '\.css$' { 'Cascading Stylesheets' } '(?>/word/.+?\.xml|\.docx?)$' { 'Word '} '\.dll$' { 'Binary' } '\.exe$' { 'Binary' } '\.gif$' { 'GIF' } '\.go$' { 'Go Language' } '\.h$' { 'C Header' } '\.html?$' { 'Hypertext Markup Language' } '\.java$' {'Java' } '\.jpe?g$' { 'Joint Pictures Expert Group'} '\.json$' {'JavaScript Object Notation' } '\.jsonc$' {'Commented JavaScript Object Notation' } '\.jsonl$' {'JavaScript Object Notation Lines' } '\.js$' { 'Javascript'} '\.jsx$' { 'JavaScript XML'} '\.(?>md|mdx|markdown)$' { 'Markdown' } '\.midi?$' { 'MIDI' } '\.(?>jsm|mjs)$' { 'JavaScript Module'} '\.mkv$' { 'Matroska Video'} '\.mka$' { 'Matroska Audio'} '\.mks$' { 'Matroska Subtitle'} '\.mk3d$' { 'Matroska Stereoscopic Video'} '\.mp3$' { 'MP3' } '\.mp4$' { 'MP4' } '\.nix$' { 'Nix' } '\.oog$' { 'OOG' } '\.pl$' { 'Perl' } '\.png$' { 'Portable Network Graphics' } '(?>/ppt/.+?\.xml|\.pptx?)$' { 'PowerPoint'} '\.psm?1$' { 'PowerShell' } '\.psd1$' {'PowerShell Data Language' } '\.ps1xml$' { 'PowerShell Xml' } '\.py$' { 'Python' } '\.rs$' { 'Rust '} '\.rss$' { 'RSS' } '\.sh$' { 'BourneShell'} '\.stl$' { 'STL'} '\.svg$' { 'SVG' } '\.tar$' { 'Tarfile' } '(?>\.tar\.gz|\.tgz)$' { 'GZippedTarfile' } '\.tsx?$' { 'TypeScript' } '\.tsv$' { 'Tab Separated Values' } '\.toml$' { 'Tom''s Obvious Minimal Language' } '\.xhtml$' { 'XHTML' } '(?>/xl/.+?\.xml|\.xlsx?)$' { 'Excel'} '\.xsl$' { 'XSL' } '\.xml$' { 'XML' } '\.ya?ml$' { 'Yaml' } '\.zip$' { 'Zip' } '\.webm' { 'Web Movie' } '\.weba' { 'Web Audio' } '\.webp' { 'Web Photo' } default { 'Unknown' } } if (-not $recognizedLanguage) { continue } if (-not $LanguagesByLength[$recognizedLanguage]) { $LanguagesByLength[$recognizedLanguage] = 0 } $LanguagesByLength[$recognizedLanguage]+=$partLength $totalLength += $partLength } $SortedByLength = [Ordered]@{} foreach ($keyValue in $languagesByLength.GetEnumerator() | Sort-Object Value -Descending ) { $SortedByLength[$keyValue.Key] = $keyValue.Value / $totalLength } return $SortedByLength |