Functions/GenXdev.Helpers/Get-SpeechToText.cs

// ################################################################################
// Part of PowerShell module : GenXdev.Helpers
// Original cmdlet filename : Get-SpeechToText.cs
// Original author : René Vaessen / GenXdev
// Version : 1.304.2025
// ################################################################################
// Copyright (c) René Vaessen / GenXdev
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ################################################################################
 
 
 
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Management;
using System.Management.Automation;
using System.Threading;
using System.Threading.Tasks;
using Whisper.net;
using Whisper.net.Ggml;
 
namespace GenXdev.Helpers
{
 
    /// <summary>
    /// <para type="synopsis">
    /// Converts audio files to text using OpenAI's Whisper speech recognition model.
    /// </para>
    ///
    /// <para type="description">
    /// This cmdlet processes audio files and converts speech to text using the Whisper.NET
    /// library, which implements OpenAI's Whisper automatic speech recognition (ASR) system.
    /// It supports multiple languages, translation capabilities, and various transcription
    /// quality settings.
    /// </para>
    ///
    /// <para type="description">
    /// PARAMETERS
    /// </para>
    ///
    /// <para type="description">
    /// -Input &lt;Object&gt;<br/>
    /// Audio file path, FileInfo object, or any audio format supported by Whisper.<br/>
    /// - <b>Aliases</b>: WaveFile<br/>
    /// - <b>Position</b>: 0<br/>
    /// - <b>Mandatory</b>: Yes<br/>
    /// - <b>Pipeline input</b>: Yes<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -ModelFileDirectoryPath &lt;String&gt;<br/>
    /// Directory path where Whisper model files are stored or will be downloaded.<br/>
    /// - <b>Default</b>: $ENV:LOCALAPPDATA\GenXdev.PowerShell<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -ModelType &lt;GgmlType&gt;<br/>
    /// Specifies which Whisper model to use for transcription.<br/>
    /// - <b>Default</b>: LargeV3Turbo<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -LanguageIn &lt;String&gt;<br/>
    /// Input language code for speech recognition (e.g., 'en', 'es', 'fr').<br/>
    /// - <b>Default</b>: "auto" (automatic detection)<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -CpuThreads &lt;Int32&gt;<br/>
    /// Number of CPU threads to use for processing.<br/>
    /// - <b>Default</b>: Physical core count (auto-detected)<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -Temperature &lt;Single&gt;<br/>
    /// Temperature for speech detection (0.0-1.0). Lower values produce more
    /// consistent results.<br/>
    /// - <b>Range</b>: 0.0 to 1.0<br/>
    /// - <b>Default</b>: 0.0<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -TemperatureInc &lt;Single&gt;<br/>
    /// Temperature increment for fallback attempts.<br/>
    /// - <b>Range</b>: 0.0 to 1.0<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -NoSpeechThreshold &lt;Single&gt;<br/>
    /// Threshold for detecting silence vs speech (0.0-1.0).<br/>
    /// - <b>Range</b>: 0.0 to 1.0<br/>
    /// - <b>Default</b>: 0.6<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -Prompt &lt;String&gt;<br/>
    /// Optional text prompt to guide the model's transcription style.
    /// </para>
    ///
    /// <para type="description">
    /// -SuppressRegex &lt;String&gt;<br/>
    /// Regular expression pattern to suppress specific tokens from output.
    /// </para>
    ///
    /// <para type="description">
    /// -TokenTimestampsSumThreshold &lt;Single&gt;<br/>
    /// Sum threshold for token timestamps when WithTokenTimestamps is enabled.<br/>
    /// - <b>Default</b>: 0.5<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -MaxTokensPerSegment &lt;Int32&gt;<br/>
    /// Maximum number of tokens per transcription segment.
    /// </para>
    ///
    /// <para type="description">
    /// -AudioContextSize &lt;Int32&gt;<br/>
    /// Size of the audio context window for processing.
    /// </para>
    ///
    /// <para type="description">
    /// -MaxDuration &lt;TimeSpan&gt;<br/>
    /// Maximum duration of audio to process.
    /// </para>
    ///
    /// <para type="description">
    /// -Offset &lt;TimeSpan&gt;<br/>
    /// Time offset to start processing audio from.
    /// </para>
    ///
    /// <para type="description">
    /// -MaxLastTextTokens &lt;Int32&gt;<br/>
    /// Maximum number of last text tokens to consider for context.
    /// </para>
    ///
    /// <para type="description">
    /// -MaxSegmentLength &lt;Int32&gt;<br/>
    /// Maximum length of each transcription segment.
    /// </para>
    ///
    /// <para type="description">
    /// -MaxInitialTimestamp &lt;TimeSpan&gt;<br/>
    /// Start timestamps at this moment in the audio.
    /// </para>
    ///
    /// <para type="description">
    /// -LengthPenalty &lt;Single&gt;<br/>
    /// Penalty applied to longer segments (0.0-1.0).<br/>
    /// - <b>Range</b>: 0.0 to 1.0<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -EntropyThreshold &lt;Single&gt;<br/>
    /// Entropy threshold for segment quality assessment (0.0-1.0).<br/>
    /// - <b>Range</b>: 0.0 to 1.0<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -LogProbThreshold &lt;Single&gt;<br/>
    /// Log probability threshold for filtering low-confidence segments (0.0-1.0).<br/>
    /// - <b>Range</b>: 0.0 to 1.0<br/>
    /// </para>
    ///
    /// <para type="description">
    /// -Passthru [&lt;SwitchParameter&gt;]<br/>
    /// Returns SegmentData objects instead of plain text strings.
    /// </para>
    ///
    /// <para type="description">
    /// -WithTokenTimestamps [&lt;SwitchParameter&gt;]<br/>
    /// Includes precise timestamps for each token in the output.
    /// </para>
    ///
    /// <para type="description">
    /// -SplitOnWord [&lt;SwitchParameter&gt;]<br/>
    /// Splits transcription segments on word boundaries.
    /// </para>
    ///
    /// <para type="description">
    /// -WithTranslate [&lt;SwitchParameter&gt;]<br/>
    /// Translates non-English audio to English.
    /// </para>
    ///
    /// <para type="description">
    /// -WithProgress [&lt;SwitchParameter&gt;]<br/>
    /// Shows progress indicator during processing.
    /// </para>
    ///
    /// <para type="description">
    /// -DontSuppressBlank [&lt;SwitchParameter&gt;]<br/>
    /// Includes blank/silent segments in output.
    /// </para>
    ///
    /// <para type="description">
    /// -SingleSegmentOnly [&lt;SwitchParameter&gt;]<br/>
    /// Forces output as a single segment instead of multiple segments.
    /// </para>
    ///
    /// <para type="description">
    /// -PrintSpecialTokens [&lt;SwitchParameter&gt;]<br/>
    /// Includes special tokens (like timestamps markers) in output.
    /// </para>
    ///
    /// <para type="description">
    /// -NoContext [&lt;SwitchParameter&gt;]<br/>
    /// Disables context from previous segments.
    /// </para>
    ///
    /// <para type="description">
    /// -WithBeamSearchSamplingStrategy [&lt;SwitchParameter&gt;]<br/>
    /// Uses beam search sampling strategy for improved accuracy.
    /// </para>
    ///
    /// <example>
    /// <para>Basic speech-to-text conversion</para>
    /// <para>
    /// Transcribes an audio file to text using default settings.
    /// </para>
    /// <code>
    /// Get-SpeechToText -Input "C:\audio\recording.wav"
    /// </code>
    /// </example>
    ///
    /// <example>
    /// <para>Process multiple files with pipeline input</para>
    /// <para>
    /// Transcribes all WAV files in a directory.
    /// </para>
    /// <code>
    /// Get-ChildItem "C:\audio\*.wav" | Get-SpeechToText
    /// </code>
    /// </example>
    ///
    /// <example>
    /// <para>Translate Spanish audio to English</para>
    /// <para>
    /// Transcribes Spanish audio and translates it to English.
    /// </para>
    /// <code>
    /// Get-SpeechToText -Input "audio.mp3" -LanguageIn "es" -WithTranslate
    /// </code>
    /// </example>
    ///
    /// <example>
    /// <para>Get detailed segment data with timestamps</para>
    /// <para>
    /// Returns SegmentData objects with precise timing information.
    /// </para>
    /// <code>
    /// Get-SpeechToText -Input "recording.wav" -Passthru -WithTokenTimestamps
    /// </code>
    /// </example>
    /// </summary>
    [Cmdlet(VerbsCommon.Get, "SpeechToText")]
    [OutputType(typeof(string), typeof(SegmentData))]
    public class GetSpeechToText : PSCmdlet
    {
 
        #region Cmdlet Parameters
 
        /// <summary>
        /// Directory path where Whisper model files are stored or downloaded
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Path to the model file directory")]
        public string ModelFileDirectoryPath { get; set; }
 
        /// <summary>
        /// Audio file path, FileInfo object, or any audio format supported by Whisper
        /// </summary>
        [Alias("WaveFile")]
        [Parameter(
            Mandatory = true,
            Position = 0,
            ValueFromPipeline = true,
            HelpMessage = "Audio file path, FileInfo object, or any audio " +
                          "format supported by Whisper.")]
        public object Input { get; set; }
 
        /// <summary>
        /// Input language code for speech recognition
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Sets the input language to detect, defaults to 'en'")]
        public string LanguageIn { get; set; } = "en";
 
        /// <summary>
        /// Number of CPU threads to use for processing
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Sets the output language")]
        public int CpuThreads { get; set; } = 0;
 
        /// <summary>
        /// Temperature for speech detection (0.0-1.0)
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Temperature for speech detection")]
        [ValidateRange(0, 1)]
        public float? Temperature { get; set; }
 
        /// <summary>
        /// Temperature increment for fallback attempts
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Temperature increment")]
        [ValidateRange(0, 1)]
        public float? TemperatureInc { get; set; }
 
        /// <summary>
        /// Threshold for detecting silence vs speech (0.0-1.0)
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "No speech threshold")]
        [ValidateRange(0, 1)]
        public float? NoSpeechThreshold { get; set; }
 
        /// <summary>
        /// Optional text prompt to guide the model's transcription style
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Prompt to use for the model")]
        public string Prompt { get; set; }
 
        /// <summary>
        /// Regular expression pattern to suppress specific tokens from output
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Regex to suppress tokens from the output")]
        public string SuppressRegex { get; set; } = null;
 
        /// <summary>
        /// Sum threshold for token timestamps when WithTokenTimestamps is enabled
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Sum threshold for token timestamps, defaults to 0.5")]
        public float TokenTimestampsSumThreshold { get; set; } = 0.5f;
 
        /// <summary>
        /// Maximum number of tokens per transcription segment
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Maximum number of tokens per segment")]
        public int? MaxTokensPerSegment { get; set; }
 
        /// <summary>
        /// Size of the audio context window for processing
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Size of the audio context")]
        public int? AudioContextSize { get; set; }
 
        /// <summary>
        /// Maximum duration of audio to process
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Maximum duration of the audio")]
        public TimeSpan? MaxDuration { get; set; }
 
        /// <summary>
        /// Time offset to start processing audio from
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Offset for the audio")]
        public TimeSpan? Offset { get; set; }
 
        /// <summary>
        /// Maximum number of last text tokens to consider for context
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Maximum number of last text tokens")]
        public int? MaxLastTextTokens { get; set; }
 
        /// <summary>
        /// Maximum length of each transcription segment
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Maximum segment length")]
        public int? MaxSegmentLength { get; set; }
 
        /// <summary>
        /// Start timestamps at this moment in the audio
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Start timestamps at this moment")]
        public TimeSpan? MaxInitialTimestamp { get; set; }
 
        /// <summary>
        /// Penalty applied to longer segments (0.0-1.0)
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Length penalty")]
        [ValidateRange(0, 1)]
        public float? LengthPenalty { get; set; }
 
        /// <summary>
        /// Entropy threshold for segment quality assessment (0.0-1.0)
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Entropy threshold")]
        [ValidateRange(0, 1)]
        public float? EntropyThreshold { get; set; }
 
        /// <summary>
        /// Log probability threshold for filtering low-confidence segments (0.0-1.0)
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Log probability threshold")]
        [ValidateRange(0, 1)]
        public float? LogProbThreshold { get; set; }
 
        /// <summary>
        /// Whisper model type to use for transcription
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Whisper model type to use, defaults to LargeV3Turbo")]
        public GgmlType ModelType { get; set; } = GgmlType.LargeV3Turbo;
 
        /// <summary>
        /// Returns SegmentData objects instead of plain text strings
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Returns objects instead of strings")]
        public SwitchParameter Passthru { get; set; }
 
        /// <summary>
        /// Includes precise timestamps for each token in the output
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Whether to include token timestamps")]
        public SwitchParameter WithTokenTimestamps { get; set; }
 
        /// <summary>
        /// Splits transcription segments on word boundaries
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Whether to split on word boundaries")]
        public SwitchParameter SplitOnWord { get; set; }
 
        /// <summary>
        /// Translates non-English audio to English
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Whether to translate the output")]
        public SwitchParameter WithTranslate { get; set; }
 
        /// <summary>
        /// Shows progress indicator during processing
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Whether to show progress")]
        public SwitchParameter WithProgress { get; set; }
 
        /// <summary>
        /// Includes blank/silent segments in output
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Whether to NOT suppress blank lines")]
        public SwitchParameter DontSuppressBlank { get; set; }
 
        /// <summary>
        /// Forces output as a single segment instead of multiple segments
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Whether to use single segment only")]
        public SwitchParameter SingleSegmentOnly { get; set; }
 
        /// <summary>
        /// Includes special tokens (like timestamps markers) in output
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Whether to print special tokens")]
        public SwitchParameter PrintSpecialTokens { get; set; }
 
        /// <summary>
        /// Disables context from previous segments
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Don't use context")]
        public SwitchParameter NoContext { get; set; }
 
        /// <summary>
        /// Uses beam search sampling strategy for improved accuracy
        /// </summary>
        [Parameter(
            Mandatory = false,
            HelpMessage = "Use beam search sampling strategy")]
        public SwitchParameter WithBeamSearchSamplingStrategy { get; set; }
 
        #endregion
 
        #region Private Fields
 
        // Thread-safe queue for storing transcription results
        private readonly ConcurrentQueue<SegmentData> _results = new();
 
        // Thread-safe queue for storing error records from background processing
        private readonly ConcurrentQueue<ErrorRecord> _errorQueue = new();
 
        // Thread-safe queue for storing verbose messages from background processing
        private readonly ConcurrentQueue<string> _verboseQueue = new();
 
        // Cancellation token source for aborting long-running operations
        private CancellationTokenSource _cts;
 
        // Whisper processor instance for speech-to-text conversion
        private WhisperProcessor _processor;
 
        // Whisper factory instance (kept for proper disposal order)
        private WhisperFactory _whisperFactory;
 
        // Flag to track if resources have been disposed
        private bool _isDisposed = false;
 
        // Lock object for thread-safe disposal
        private readonly object _disposeLock = new object();
 
        #endregion
 
        #region Lifecycle Methods
 
        /// <summary>
        /// Begin processing - initializes Whisper model and processor
        /// </summary>
        protected override void BeginProcessing()
        {
 
            base.BeginProcessing();
 
            // Set default model directory if not specified or invalid
            if (string.IsNullOrEmpty(ModelFileDirectoryPath) ||
                !Directory.Exists(ModelFileDirectoryPath))
            {
 
                var localAppData =
                    System.Environment.GetEnvironmentVariable("LOCALAPPDATA");
 
                if (!string.IsNullOrEmpty(localAppData))
                {
 
                    ModelFileDirectoryPath =
                        Path.Combine(localAppData, "GenXdev.PowerShell");
                }
 
                // Create directory if it doesn't exist
                if (!Directory.Exists(ModelFileDirectoryPath))
                {
 
                    try
                    {
 
                        Directory.CreateDirectory(ModelFileDirectoryPath);
                    }
                    catch (Exception ex)
                    {
 
                        ThrowTerminatingError(new ErrorRecord(
                            ex,
                            "ModelPathCreationFailed",
                            ErrorCategory.ResourceUnavailable,
                            ModelFileDirectoryPath));
                    }
                }
            }
 
            // Set language to auto-detect if not explicitly specified
            if (!MyInvocation.BoundParameters.ContainsKey("LanguageIn"))
            {
 
                LanguageIn = "auto";
            }
 
            // Log all user-specified parameters
            WriteVerbose($"ModelFileDirectoryPath: {ModelFileDirectoryPath}");
 
            if (MyInvocation.BoundParameters.ContainsKey("Input"))
                WriteVerbose($"Input: {Input}");
 
            if (MyInvocation.BoundParameters.ContainsKey("Passthru"))
                WriteVerbose($"Passthru: {Passthru}");
 
            if (MyInvocation.BoundParameters.ContainsKey("WithTokenTimestamps"))
                WriteVerbose($"WithTokenTimestamps: {WithTokenTimestamps}");
 
            if (MyInvocation.BoundParameters.ContainsKey(
                "TokenTimestampsSumThreshold"))
                WriteVerbose(
                    $"TokenTimestampsSumThreshold: {TokenTimestampsSumThreshold}");
 
            if (MyInvocation.BoundParameters.ContainsKey("SplitOnWord"))
                WriteVerbose($"SplitOnWord: {SplitOnWord}");
 
            if (MyInvocation.BoundParameters.ContainsKey("MaxTokensPerSegment"))
                WriteVerbose($"MaxTokensPerSegment: {MaxTokensPerSegment}");
 
            if (MyInvocation.BoundParameters.ContainsKey("LanguageIn"))
                WriteVerbose($"LanguageIn: {LanguageIn}");
 
            if (MyInvocation.BoundParameters.ContainsKey("CpuThreads"))
                WriteVerbose($"CpuThreads: {CpuThreads}");
 
            if (MyInvocation.BoundParameters.ContainsKey("Temperature"))
                WriteVerbose($"Temperature: {Temperature}");
 
            if (MyInvocation.BoundParameters.ContainsKey("TemperatureInc"))
                WriteVerbose($"TemperatureInc: {TemperatureInc}");
 
            if (MyInvocation.BoundParameters.ContainsKey("WithTranslate"))
                WriteVerbose($"WithTranslate: {WithTranslate}");
 
            if (MyInvocation.BoundParameters.ContainsKey("Prompt"))
                WriteVerbose($"Prompt: {Prompt}");
 
            if (MyInvocation.BoundParameters.ContainsKey("SuppressRegex"))
                WriteVerbose($"SuppressRegex: {SuppressRegex}");
 
            if (MyInvocation.BoundParameters.ContainsKey("WithProgress"))
                WriteVerbose($"WithProgress: {WithProgress}");
 
            if (MyInvocation.BoundParameters.ContainsKey("AudioContextSize"))
                WriteVerbose($"AudioContextSize: {AudioContextSize}");
 
            if (MyInvocation.BoundParameters.ContainsKey("DontSuppressBlank"))
                WriteVerbose($"DontSuppressBlank: {DontSuppressBlank}");
 
            if (MyInvocation.BoundParameters.ContainsKey("MaxDuration"))
                WriteVerbose($"MaxDuration: {MaxDuration}");
 
            if (MyInvocation.BoundParameters.ContainsKey("Offset"))
                WriteVerbose($"Offset: {Offset}");
 
            if (MyInvocation.BoundParameters.ContainsKey("MaxLastTextTokens"))
                WriteVerbose($"MaxLastTextTokens: {MaxLastTextTokens}");
 
            if (MyInvocation.BoundParameters.ContainsKey("SingleSegmentOnly"))
                WriteVerbose($"SingleSegmentOnly: {SingleSegmentOnly}");
 
            if (MyInvocation.BoundParameters.ContainsKey("PrintSpecialTokens"))
                WriteVerbose($"PrintSpecialTokens: {PrintSpecialTokens}");
 
            if (MyInvocation.BoundParameters.ContainsKey("MaxSegmentLength"))
                WriteVerbose($"MaxSegmentLength: {MaxSegmentLength}");
 
            if (MyInvocation.BoundParameters.ContainsKey("MaxInitialTimestamp"))
                WriteVerbose($"MaxInitialTimestamp: {MaxInitialTimestamp}");
 
            if (MyInvocation.BoundParameters.ContainsKey("LengthPenalty"))
                WriteVerbose($"LengthPenalty: {LengthPenalty}");
 
            if (MyInvocation.BoundParameters.ContainsKey("EntropyThreshold"))
                WriteVerbose($"EntropyThreshold: {EntropyThreshold}");
 
            if (MyInvocation.BoundParameters.ContainsKey("LogProbThreshold"))
                WriteVerbose($"LogProbThreshold: {LogProbThreshold}");
 
            if (MyInvocation.BoundParameters.ContainsKey("NoSpeechThreshold"))
                WriteVerbose($"NoSpeechThreshold: {NoSpeechThreshold}");
 
            if (MyInvocation.BoundParameters.ContainsKey("NoContext"))
                WriteVerbose($"NoContext: {NoContext}");
 
            if (MyInvocation.BoundParameters.ContainsKey(
                "WithBeamSearchSamplingStrategy"))
                WriteVerbose(
                    $"WithBeamSearchSamplingStrategy: " +
                    $"{WithBeamSearchSamplingStrategy}");
 
            if (MyInvocation.BoundParameters.ContainsKey("ModelType"))
                WriteVerbose($"ModelType: {ModelType}");
 
            // Initialize cancellation token for aborting operations
            _cts = new CancellationTokenSource();
 
            // Construct full model file path
            var ggmlType = ModelType;
 
            var modelFileName = Path.GetFullPath(
                Path.Combine(ModelFileDirectoryPath, GetModelFileName(ModelType)));
 
            // Download model if not already present
            if (!File.Exists(modelFileName))
            {
 
                DownloadModel(modelFileName, ggmlType).GetAwaiter().GetResult();
            }
 
            // Initialize Whisper factory and processor
            _whisperFactory = WhisperFactory.FromPath(modelFileName);
 
            var builder = ConfigureWhisperBuilder(_whisperFactory.CreateBuilder());
 
            _processor = builder.Build();
        }
 
        /// <summary>
        /// Process record - transcribes each input audio file
        /// </summary>
        protected override void ProcessRecord()
        {
 
            base.ProcessRecord();
 
            // Extract file path from various input types
            string filePath = GetFilePathFromInput(Input);
 
            if (string.IsNullOrEmpty(filePath))
            {
 
                WriteError(new ErrorRecord(
                    new ArgumentException(
                        "Input parameter is required and must be a valid " +
                        "file path or FileInfo object."),
                    "MissingInput",
                    ErrorCategory.InvalidArgument,
                    Input));
 
                return;
            }
 
            // Validate that the audio file exists
            if (!File.Exists(filePath))
            {
 
                WriteError(new ErrorRecord(
                    new FileNotFoundException(
                        $"Audio file not found: {filePath}"),
                    "FileNotFound",
                    ErrorCategory.ObjectNotFound,
                    filePath));
 
                return;
            }
 
            WriteVerbose($"Processing audio file: {filePath}");
 
            // Process the audio file using the initialized processor
            ProcessAudioFile(filePath);
        }
 
        /// <summary>
        /// End processing - cleanup and dispose resources
        /// </summary>
        protected override void EndProcessing()
        {
 
            lock (_disposeLock)
            {
 
                if (_isDisposed) return;
 
                _isDisposed = true;
            }
 
            try
            {
 
                // Cancel any ongoing operations
                if (_cts != null && !_cts.IsCancellationRequested)
                {
 
                    _cts.Cancel();
                }
 
                // Dispose Whisper processor (must be disposed before factory)
                if (_processor != null)
                {
 
                    try
                    {
 
                        // Handle async disposable resources
                        if (_processor is IAsyncDisposable asyncDisposable)
                        {
 
                            asyncDisposable.DisposeAsync()
                                .AsTask()
                                .Wait(TimeSpan.FromSeconds(50));
                        }
                        else if (_processor is IDisposable disposable)
                        {
 
                            disposable.Dispose();
                        }
                    }
                    catch (Exception ex)
                    {
 
                        WriteVerbose(
                            $"Error disposing Whisper processor: {ex.Message}");
                    }
                    finally
                    {
 
                        _processor = null;
                    }
                }
 
                // Dispose Whisper factory
                if (_whisperFactory != null)
                {
 
                    try
                    {
 
                        _whisperFactory.Dispose();
                    }
                    catch (Exception ex)
                    {
 
                        WriteVerbose(
                            $"Error disposing Whisper factory: {ex.Message}");
                    }
                    finally
                    {
 
                        _whisperFactory = null;
                    }
                }
 
                // Dispose cancellation token source
                if (_cts != null)
                {
 
                    try
                    {
 
                        _cts.Dispose();
                    }
                    catch (Exception ex)
                    {
 
                        WriteVerbose(
                            $"Error disposing cancellation token source: " +
                            $"{ex.Message}");
                    }
                    finally
                    {
 
                        _cts = null;
                    }
                }
            }
            catch (Exception ex)
            {
 
                WriteVerbose($"Error in EndProcessing: {ex.Message}");
            }
 
            base.EndProcessing();
        }
 
        #endregion
 
 
        #region Private Helper Methods
 
        /// <summary>
        /// Extracts file path from various input object types
        /// </summary>
        /// <param name="input">Input object (string, FileInfo, or PSObject)</param>
        /// <returns>File path string or null if extraction fails</returns>
        private string GetFilePathFromInput(object input)
        {
 
            if (input == null) return null;
 
            // Handle FileInfo objects directly
            if (input is FileInfo fileInfo)
            {
 
                return fileInfo.FullName;
            }
 
            // Handle string paths
            if (input is string stringPath)
            {
 
                return stringPath;
            }
 
            // Handle PSObject wrappers around FileInfo or strings
            if (input is PSObject psObject)
            {
 
                var baseObject = psObject.BaseObject;
 
                if (baseObject is FileInfo fi)
                {
 
                    return fi.FullName;
                }
 
                if (baseObject is string str)
                {
 
                    return str;
                }
            }
 
            // Fallback: attempt to convert to string
            try
            {
 
                return input.ToString();
            }
            catch
            {
 
                return null;
            }
        }
 
        /// <summary>
        /// Processes an audio file and transcribes it to text segments
        /// </summary>
        /// <param name="filePath">Path to the audio file</param>
        private void ProcessAudioFile(string filePath)
        {
 
            using var audioStream = File.OpenRead(filePath);
 
            // Start async processing task in background
            var processingTask = Task.Run(async () =>
            {
 
                try
                {
 
                    // Process audio stream and collect segments
                    await foreach (var segment in
                        _processor.ProcessAsync(audioStream, _cts.Token))
                    {
 
                        // Check for cancellation or disposal
                        if (_cts.IsCancellationRequested || _isDisposed)
                            break;
 
                        // Filter out empty or blank audio segments
                        if (!string.IsNullOrWhiteSpace(segment.Text))
                        {
 
                            if (!(segment.Text.Trim(
                                "\r\n\t ".ToCharArray()) == "[BLANK_AUDIO]"))
                            {
 
                                _results.Enqueue(segment);
                            }
                        }
                    }
                }
                catch (OperationCanceledException)
                {
                }
                catch (Exception ex)
                {
 
                    _errorQueue.Enqueue(new ErrorRecord(
                        ex,
                        "ProcessingError",
                        ErrorCategory.OperationStopped,
                        null));
                }
            });
 
            Console.WriteLine("Processing audio file. Press Q to abort...");
 
            // Main processing loop with keyboard interrupt support
            while (!processingTask.IsCompleted)
            {
 
                try
                {
 
                    // Check for Q key to abort processing
                    if (Console.KeyAvailable &&
                        Console.ReadKey(true).Key == ConsoleKey.Q)
                    {
 
                        _cts.Cancel();
 
                        _errorQueue.Enqueue(new ErrorRecord(
                            new Exception("Processing aborted"),
                            "ProcessingAborted",
                            ErrorCategory.OperationStopped,
                            null));
 
                        break;
                    }
 
                    Thread.Sleep(100);
                }
                catch (Exception ex)
                {
 
                    WriteError(new ErrorRecord(
                        ex,
                        "ProcessingLoopError",
                        ErrorCategory.OperationStopped,
                        null));
 
                    break;
                }
            }
 
            // Wait for processing task to complete with timeout
            bool taskCompleted = false;
 
            try
            {
 
                // First attempt: graceful wait
                taskCompleted = processingTask.Wait(TimeSpan.FromSeconds(10));
 
                if (!taskCompleted)
                {
 
                    // Second attempt: cancel and wait again
                    _cts.Cancel();
 
                    taskCompleted = processingTask.Wait(TimeSpan.FromSeconds(5));
                }
            }
            catch (AggregateException ex)
            {
 
                // Handle task exceptions
                foreach (var innerEx in ex.InnerExceptions)
                {
 
                    if (!(innerEx is OperationCanceledException))
                    {
 
                        WriteVerbose(
                            $"Processing task error: {innerEx.Message}");
                    }
                }
            }
            catch (Exception ex)
            {
 
                WriteVerbose(
                    $"Error waiting for processing task: {ex.Message}");
            }
 
            // Collect all queued results
            int timeout = 0;
 
            List<SegmentData> allSegments = new List<SegmentData>();
 
            while (timeout < 50)
            {
 
                bool hasResults = false;
 
                // Process all queued error records
                while (_errorQueue.TryDequeue(out var errorRecord))
                {
 
                    WriteError(errorRecord);
 
                    hasResults = true;
                }
 
                // Process all queued verbose messages
                while (_verboseQueue.TryDequeue(out var verboseMessage))
                {
 
                    WriteVerbose(verboseMessage);
 
                    hasResults = true;
                }
 
                // Collect all transcription segments
                while (_results.TryDequeue(out var segment))
                {
 
                    allSegments.Add(segment);
 
                    hasResults = true;
                }
 
                // Exit if no more results and task completed
                if (!hasResults && taskCompleted)
                {
 
                    break;
                }
 
                Thread.Sleep(100);
 
                timeout++;
            }
 
            // Output all collected segments
            foreach (var segment in allSegments)
            {
 
                WriteObject(Passthru ? segment : segment.Text.Trim());
            }
        }
 
        /// <summary>
        /// Configures the Whisper processor builder with all specified parameters
        /// </summary>
        /// <param name="builder">Whisper processor builder instance</param>
        /// <returns>Configured builder instance</returns>
        private WhisperProcessorBuilder ConfigureWhisperBuilder(
            WhisperProcessorBuilder builder)
        {
 
            // Detect physical CPU core count for optimal threading
            int physicalCoreCount = 0;
 
            var searcher = new ManagementObjectSearcher(
                "select NumberOfCores from Win32_Processor");
 
            foreach (var item in searcher.Get())
            {
 
                physicalCoreCount += Convert.ToInt32(item["NumberOfCores"]);
            }
 
            // Set language and thread count
            builder.WithLanguage(LanguageIn)
                   .WithThreads(CpuThreads > 0 ? CpuThreads : physicalCoreCount);
 
            // Enable translation if language detection is active
            if (MyInvocation.BoundParameters.ContainsKey("LanguageIn"))
            {
 
                builder.WithTranslate();
            }
 
            // Configure temperature for speech detection consistency
            if (Temperature.HasValue)
            {
 
                builder.WithTemperature(Temperature.Value);
            }
            else
            {
 
                builder.WithTemperature(0.0f);
            }
 
            if (TemperatureInc.HasValue)
                builder.WithTemperatureInc(TemperatureInc.Value);
 
            if (WithTokenTimestamps.IsPresent)
                builder.WithTokenTimestamps()
                    .WithTokenTimestampsSumThreshold(TokenTimestampsSumThreshold);
 
            if (WithTranslate.IsPresent)
                builder.WithTranslate();
 
            if (!string.IsNullOrWhiteSpace(Prompt))
                builder.WithPrompt(Prompt);
 
            if (!string.IsNullOrWhiteSpace(SuppressRegex))
                builder.WithSuppressRegex(SuppressRegex);
 
            if (WithProgress.IsPresent)
            {
 
                builder.WithProgressHandler(progress =>
                    WriteProgress(new ProgressRecord(
                        1,
                        "Processing",
                        $"Progress: {progress}%")
                    {
                        PercentComplete = progress
                    }));
            }
 
            if (SplitOnWord.IsPresent)
                builder.SplitOnWord();
 
            if (MaxTokensPerSegment.HasValue)
                builder.WithMaxTokensPerSegment(MaxTokensPerSegment.Value);
 
            // Configure silence detection threshold
            if (NoSpeechThreshold.HasValue)
            {
 
                builder.WithNoSpeechThreshold(NoSpeechThreshold.Value);
            }
            else
            {
 
                builder.WithNoSpeechThreshold(0.6f);
            }
 
            if (AudioContextSize.HasValue)
                builder.WithAudioContextSize(AudioContextSize.Value);
 
            if (DontSuppressBlank.IsPresent)
                builder.WithoutSuppressBlank();
 
            if (MaxDuration.HasValue)
                builder.WithDuration(MaxDuration.Value);
 
            if (Offset.HasValue)
                builder.WithOffset(Offset.Value);
 
            if (MaxLastTextTokens.HasValue)
                builder.WithMaxLastTextTokens(MaxLastTextTokens.Value);
 
            if (SingleSegmentOnly.IsPresent)
                builder.WithSingleSegment();
 
            if (PrintSpecialTokens.IsPresent)
                builder.WithPrintSpecialTokens();
 
            if (MaxSegmentLength.HasValue)
                builder.WithMaxSegmentLength(MaxSegmentLength.Value);
 
            if (MaxInitialTimestamp.HasValue)
                builder.WithMaxInitialTs(
                    (int)MaxInitialTimestamp.Value.TotalSeconds);
 
            if (LengthPenalty.HasValue)
                builder.WithLengthPenalty(LengthPenalty.Value);
 
            if (EntropyThreshold.HasValue)
                builder.WithEntropyThreshold(EntropyThreshold.Value);
 
            if (LogProbThreshold.HasValue)
                builder.WithLogProbThreshold(LogProbThreshold.Value);
 
            if (NoContext.IsPresent)
                builder.WithNoContext();
 
            if (WithBeamSearchSamplingStrategy.IsPresent)
                builder.WithBeamSearchSamplingStrategy();
 
            return builder;
        }
 
        /// <summary>
        /// Downloads a Whisper model file if not already present
        /// </summary>
        /// <param name="fileName">Target file path for the model</param>
        /// <param name="ggmlType">Model type to download</param>
        private static async Task DownloadModel(string fileName, GgmlType ggmlType)
        {
 
            Console.WriteLine($"Downloading Model {fileName}");
 
            using var modelStream =
                await WhisperGgmlDownloader.GetGgmlModelAsync(ggmlType);
 
            using var fileWriter = File.OpenWrite(fileName);
 
            await modelStream.CopyToAsync(fileWriter);
        }
 
        /// <summary>
        /// Generates the standard model filename for a given model type
        /// </summary>
        /// <param name="modelType">Whisper model type</param>
        /// <returns>Model filename string</returns>
        private static string GetModelFileName(GgmlType modelType)
        {
 
            return $"ggml-{modelType}.bin";
        }
 
        #endregion
    }
}