Types/OpenPackage/get_LanguagePercent.ps1

<#
.SYNOPSIS
    Gets the language percentages of a package
.DESCRIPTION
    Gets the language percentages present in the package.
.NOTES
    Definitions of what constitutes a language have been quite contentious.

    For the purposes of accurately identifying what lies within a package, we want a very broad definition.

    If you believe a language should be included, file an issue.

    If you believe any given file format is or is not a language, do not file an issue.
#>

$LanguagesByLength = [Ordered]@{}

$totalLength = 0
$fileSizes = $this.FileSize
foreach ($part in $this.GetParts()) {            
    $partLength = $fileSizes[$part.Uri]

    $recognizedLanguage =
        switch -regex ($part.Uri) {
            '\.3mf$' { '3MF'}
            '\.astro' { 'Astro' }
            '\.c$' { 'C' }
            '\.cast$' { 'Asciiema' }
            '\.clixml$' { 'Clixml'}
            '\.cjs$' { 'Common JavaScript'}            
            '\.cpp$' { 'C++' }
            '\.cs$' { 'C# '}
            '\.csv$' { 'Comma Separated Values' }            
            '\.csh$' { 'CShell'}
            '\.css$' { 'Cascading Stylesheets' }
            '(?>/word/.+?\.xml|\.docx?)$' { 'Word '}
            '\.dll$' { 'Binary' } 
            '\.exe$' { 'Binary' }
            '\.gif$' { 'GIF' }
            '\.go$' { 'Go Language' }
            '\.h$' { 'C Header' }
            '\.html?$' { 'Hypertext Markup Language' }
            '\.java$' {'Java' }
            '\.jpe?g$' { 'Joint Pictures Expert Group'}
            '\.json$' {'JavaScript Object Notation' }
            '\.jsonc$' {'Commented JavaScript Object Notation' }
            '\.jsonl$' {'JavaScript Object Notation Lines' }
            '\.js$' { 'Javascript'}
            '\.jsx$' { 'JavaScript XML'}
            '\.(?>md|mdx|markdown)$' { 'Markdown' }
            '\.midi?$' { 'MIDI' }
            '\.(?>jsm|mjs)$' { 'JavaScript Module'}
            '\.mkv$' { 'Matroska Video'}
            '\.mka$' { 'Matroska Audio'}
            '\.mks$' { 'Matroska Subtitle'}
            '\.mk3d$' { 'Matroska Stereoscopic Video'}
            '\.mp3$' { 'MP3' }
            '\.mp4$' { 'MP4' }
            '\.nix$' { 'Nix' }
            '\.oog$' { 'OOG' }
            '\.pl$' { 'Perl' }
            '\.png$' { 'Portable Network Graphics' }
            '(?>/ppt/.+?\.xml|\.pptx?)$' { 'PowerPoint'}
            '\.psm?1$' { 'PowerShell' }
            '\.psd1$' {'PowerShell Data Language' }
            '\.ps1xml$' { 'PowerShell Xml' }
            '\.py$' { 'Python' }            
            '\.rs$' { 'Rust '}
            '\.rss$' { 'RSS' }
            '\.sh$' { 'BourneShell'}
            '\.stl$' { 'STL'}
            '\.svg$' { 'SVG' }
            '\.tar$' { 'Tarfile' }
            '(?>\.tar\.gz|\.tgz)$' { 'GZippedTarfile' }
            '\.tsx?$' { 'TypeScript' }
            '\.tsv$' { 'Tab Separated Values' }
            '\.toml$' { 'Tom''s Obvious Minimal Language' }
            '\.xhtml$' { 'XHTML' }
            '(?>/xl/.+?\.xml|\.xlsx?)$' { 'Excel'}
            '\.xsl$' { 'XSL' }
            '\.xml$' { 'XML' }
            '\.ya?ml$' { 'Yaml' }
            '\.zip$' { 'Zip' }
            '\.webm' { 'Web Movie' }
            '\.weba' { 'Web Audio' }
            '\.webp' { 'Web Photo' }
            default { 'Unknown' }
        }

    if (-not $recognizedLanguage) {
        continue
    }

    
    if (-not $LanguagesByLength[$recognizedLanguage]) {        
        $LanguagesByLength[$recognizedLanguage] = 0    
    }
    
    $LanguagesByLength[$recognizedLanguage]+=$partLength
    
    $totalLength += $partLength
}


$SortedByLength = [Ordered]@{}

foreach ($keyValue in $languagesByLength.GetEnumerator() | 
    Sort-Object Value -Descending
) {
    $SortedByLength[$keyValue.Key] = $keyValue.Value / $totalLength
}

return $SortedByLength