ReceiveRealTimeSpeechToText.cs

using System.Management.Automation;
using NAudio.Wave;
using Whisper.net;
using Whisper.net.Ggml;
using System.Management;
using System.Collections.Concurrent;
namespace GenXdev.Helpers
{
    [Cmdlet(VerbsCommunications.Receive, "RealTimeSpeechToText")]
    public class ReceiveRealTimeSpeechToText : PSCmdlet
    {
        #region Cmdlet Parameters
        [Parameter(Mandatory = false, HelpMessage = "Path to the model file")]
        public string ModelFileDirectoryPath { get; set; }
         
        [Parameter(Mandatory = false, HelpMessage = "Whether to use desktop audio capture instead of microphone")]
        public SwitchParameter UseDesktopAudioCapture { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Audio device name or GUID (supports wildcards, picks first match)")]
        public string AudioDevice { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Returns objects instead of strings")]
        public SwitchParameter Passthru { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Whether to include token timestamps")]
        public SwitchParameter WithTokenTimestamps { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Sum threshold for token timestamps, defaults to 0.5")]
        public float TokenTimestampsSumThreshold { get; set; } = 0.5f;
        [Parameter(Mandatory = false, HelpMessage = "Whether to split on word boundaries")]
        public SwitchParameter SplitOnWord { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Maximum number of tokens per segment")]
        public int? MaxTokensPerSegment { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Whether to ignore silence (will mess up timestamps)")]
        public SwitchParameter IgnoreSilence { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Maximum duration of silence before automatically stopping recording")]
        public TimeSpan? MaxDurationOfSilence { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Silence detect threshold (0..32767 defaults to 30)")]
        [ValidateRange(0, 32767)]
        public int? SilenceThreshold { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Sets the language to detect, defaults to 'en'")]
        public string Language { get; set; } = "en";
        [Parameter(Mandatory = false, HelpMessage = "Number of CPU threads to use, defaults to 0 (auto)")]
        public int CpuThreads { get; set; } = 0;
        [Parameter(Mandatory = false, HelpMessage = "Temperature for speech detection")]
        [ValidateRange(0, 1)]
        public float? Temperature { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Temperature increment")]
        [ValidateRange(0, 1)]
        public float? TemperatureInc { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Whether to translate the output")]
        public SwitchParameter WithTranslate { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Prompt to use for the model")]
        public string Prompt { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Regex to suppress tokens from the output")]
        public string SuppressRegex { get; set; } = null;
        [Parameter(Mandatory = false, HelpMessage = "Whether to show progress")]
        public SwitchParameter WithProgress { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Size of the audio context")]
        public int? AudioContextSize { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Whether to NOT suppress blank lines")]
        public SwitchParameter DontSuppressBlank { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Maximum duration of the audio")]
        public TimeSpan? MaxDuration { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Offset for the audio")]
        public TimeSpan? Offset { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Maximum number of last text tokens")]
        public int? MaxLastTextTokens { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Whether to use single segment only")]
        public SwitchParameter SingleSegmentOnly { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Whether to print special tokens")]
        public SwitchParameter PrintSpecialTokens { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Maximum segment length")]
        public int? MaxSegmentLength { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Start timestamps at this moment")]
        public TimeSpan? MaxInitialTimestamp { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Length penalty")]
        [ValidateRange(0, 1)]
        public float? LengthPenalty { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Entropy threshold")]
        [ValidateRange(0, 1)]
        public float? EntropyThreshold { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Log probability threshold")]
        [ValidateRange(0, 1)]
        public float? LogProbThreshold { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "No speech threshold")]
        [ValidateRange(0, 1)]
        public float? NoSpeechThreshold { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Don't use context")]
        public SwitchParameter NoContext { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Use beam search sampling strategy")]
        public SwitchParameter WithBeamSearchSamplingStrategy { get; set; }
        [Parameter(Mandatory = false, HelpMessage = "Whisper model type to use, defaults to LargeV3Turbo")]
        public GgmlType ModelType { get; set; } = GgmlType.Small;
        #endregion
 
        private readonly ConcurrentQueue<SegmentData> _results = new();
        private readonly ConcurrentQueue<byte[]> _bufferQueue = new();
        private readonly ConcurrentQueue<ErrorRecord> _errorQueue = new();
        private readonly ConcurrentQueue<string> _verboseQueue = new();
        private CancellationTokenSource _cts;
        private WhisperProcessor _processor;
        private WhisperFactory _whisperFactory; // Keep reference for proper disposal
        private bool _isRecordingStarted = true;
        private bool _isDisposed = false;
        private readonly object _disposeLock = new object();
        private Task _processingTask;
 
        private int _audioCallbackCount = 0;
        private int _bufferQueueCount = 0;
 
        protected override void BeginProcessing()
        {
            base.BeginProcessing();
 
            if (string.IsNullOrEmpty(ModelFileDirectoryPath) || !Directory.Exists(ModelFileDirectoryPath))
            {
                // With this:
                var localAppData = System.Environment.GetEnvironmentVariable("LOCALAPPDATA");
                if (!string.IsNullOrEmpty(localAppData))
                {
                    ModelFileDirectoryPath = Path.Combine(localAppData, "GenXdev.PowerShell");
                }
 
                if (!Directory.Exists(ModelFileDirectoryPath))
                {
                    try
                    {
                        Directory.CreateDirectory(ModelFileDirectoryPath);
                    }
                    catch (Exception ex)
                    {
                        ThrowTerminatingError(new ErrorRecord(ex, "ModelPathCreationFailed", ErrorCategory.ResourceUnavailable, ModelFileDirectoryPath));
                    }
                }
            }
 
            WriteVerbose($"ModelFileDirectoryPath: {ModelFileDirectoryPath}");
            WriteVerbose($"UseDesktopAudioCapture: {UseDesktopAudioCapture}");
            WriteVerbose($"Passthru: {Passthru}");
            WriteVerbose($"WithTokenTimestamps: {WithTokenTimestamps}");
            WriteVerbose($"TokenTimestampsSumThreshold: {TokenTimestampsSumThreshold}");
            WriteVerbose($"SplitOnWord: {SplitOnWord}");
            WriteVerbose($"MaxTokensPerSegment: {MaxTokensPerSegment}");
            WriteVerbose($"IgnoreSilence: {IgnoreSilence}");
            WriteVerbose($"MaxDurationOfSilence: {MaxDurationOfSilence}");
            WriteVerbose($"SilenceThreshold: {SilenceThreshold}");
            WriteVerbose($"Language: {Language}");
            WriteVerbose($"CpuThreads: {CpuThreads}");
            WriteVerbose($"Temperature: {Temperature}");
            WriteVerbose($"TemperatureInc: {TemperatureInc}");
            WriteVerbose($"WithTranslate: {WithTranslate}");
            WriteVerbose($"Prompt: {Prompt}");
            WriteVerbose($"SuppressRegex: {SuppressRegex}");
            WriteVerbose($"WithProgress: {WithProgress}");
            WriteVerbose($"AudioContextSize: {AudioContextSize}");
            WriteVerbose($"DontSuppressBlank: {DontSuppressBlank}");
            WriteVerbose($"MaxDuration: {MaxDuration}");
            WriteVerbose($"Offset: {Offset}");
            WriteVerbose($"MaxLastTextTokens: {MaxLastTextTokens}");
            WriteVerbose($"SingleSegmentOnly: {SingleSegmentOnly}");
            WriteVerbose($"PrintSpecialTokens: {PrintSpecialTokens}");
            WriteVerbose($"MaxSegmentLength: {MaxSegmentLength}");
            WriteVerbose($"MaxInitialTimestamp: {MaxInitialTimestamp}");
            WriteVerbose($"LengthPenalty: {LengthPenalty}");
            WriteVerbose($"EntropyThreshold: {EntropyThreshold}");
            WriteVerbose($"LogProbThreshold: {LogProbThreshold}");
            WriteVerbose($"NoSpeechThreshold: {NoSpeechThreshold}");
            WriteVerbose($"NoContext: {NoContext}");
            WriteVerbose($"WithBeamSearchSamplingStrategy: {WithBeamSearchSamplingStrategy}");
            WriteVerbose($"ModelType: {ModelType}");
            _cts = new CancellationTokenSource();
        }
        protected override void ProcessRecord()
        {
            base.ProcessRecord();
             
            // Initialize Whisper
            var ggmlType = ModelType;
            var modelFileName = Path.GetFullPath(Path.Combine(ModelFileDirectoryPath, GetModelFileName(ModelType)));
             
            if (!File.Exists(modelFileName))
            {
                DownloadModel(modelFileName, ggmlType).GetAwaiter().GetResult();
            }
             
            _whisperFactory = WhisperFactory.FromPath(modelFileName);
            var builder = ConfigureWhisperBuilder(_whisperFactory.CreateBuilder());
            _processor = builder.Build();
 
            IWaveIn waveIn = CreateAudioInput();
             
            using (waveIn)
            {
                waveIn.WaveFormat = new WaveFormat(16000, 1);
                 
                var processingTask = Task.Run(() => ProcessAudioBuffer());
                _processingTask = processingTask; // Store reference for proper disposal
                 
                // Variables for silence detection - matching exactly the GetSpeechToText implementation
                bool hadAudio = false;
                bool everHadAudio = false;
                double totalSilenceSeconds = 0;
                double seconds = 0;
                double sum = 0;
                long count = 0;
                int threshold = SilenceThreshold.HasValue ? SilenceThreshold.Value : 30;
                
                using MemoryStream wavBufferStream = new MemoryStream();
                object syncLock = new object();
                waveIn.DataAvailable += (sender, args) =>
                {
                    if (!_isRecordingStarted || _isDisposed) return;
                     
                    _audioCallbackCount++;
                     
                    lock (syncLock)
                    {
                        if (!_isRecordingStarted || _isDisposed) return;
                         
                        // Validate buffer bounds before processing
                        if (args.Buffer == null || args.BytesRecorded <= 0 || args.BytesRecorded > args.Buffer.Length)
                        {
                            return;
                        }
                         
                        if (MaxDurationOfSilence.HasValue || IgnoreSilence.IsPresent)
                        {
                            seconds += args.BytesRecorded / 32000d;
                            count += args.BytesRecorded / 2;
                            unsafe
                            {
                                fixed (byte* buffer = args.Buffer)
                                {
                                    var floatBuffer = (Int16*)buffer;
                                    var sampleCount = Math.Min(args.BytesRecorded / 2, args.Buffer.Length / 2);
                                    for (var i = 0; i < sampleCount; i++)
                                    {
                                        sum += Math.Abs(floatBuffer[i]);
                                    }
                                }
                            }
                            wavBufferStream.Write(args.Buffer, 0, args.BytesRecorded);
                            wavBufferStream.Flush();
                            var current = (sum / count);
                             
                            if (current > threshold)
                            {
                                hadAudio = true;
                                totalSilenceSeconds = 0;
                                everHadAudio = true;
                            }
                            if (seconds > 0.85)
                            {
                                if (!_isRecordingStarted) return;
                                if (current < threshold)
                                {
                                    totalSilenceSeconds += seconds;
                                     
                                    if (everHadAudio && MaxDurationOfSilence.HasValue && (totalSilenceSeconds > MaxDurationOfSilence.Value.TotalSeconds))
                                    {
                                        // Max duration of silence reached
                                        _isRecordingStarted = false;
                                        _cts.Cancel();
                                        return;
                                    }
                                    if (IgnoreSilence.IsPresent && !hadAudio)
                                    {
                                        // Ignoring silence
                                        count = 0;
                                        sum = 0;
                                        seconds = 0;
                                        hadAudio = false;
                                        wavBufferStream.Position = 0;
                                        wavBufferStream.SetLength(0);
                                        return;
                                    }
                                    hadAudio = false;
                                }
                                // Add buffer to queue for processing
                                wavBufferStream.Position = 0;
                                var buffer = new byte[wavBufferStream.Length];
                                wavBufferStream.Read(buffer, 0, buffer.Length);
                                _bufferQueue.Enqueue(buffer);
                                _bufferQueueCount++;
                                 
                                wavBufferStream.Position = 0;
                                wavBufferStream.SetLength(0);
                                count = 0;
                                sum = 0;
                                seconds = 0;
                            }
                        }
                        else
                        {
                            // When not using silence detection, directly add to buffer queue
                            if (args.BytesRecorded > 0 && args.Buffer != null)
                            {
                                var buffer = new byte[args.BytesRecorded];
                                Array.Copy(args.Buffer, buffer, args.BytesRecorded);
                                _bufferQueue.Enqueue(buffer);
                                _bufferQueueCount++;
                            }
                        }
                    }
                };
                 
                waveIn.StartRecording();
                Console.WriteLine("Recording started. Press Q to stop...");
                var startTime = System.DateTime.UtcNow;
                while (!_cts.IsCancellationRequested && _isRecordingStarted)
                {
                    try
                    {
                        if (Console.KeyAvailable)
                        {
                            var keyInfo = Console.ReadKey(true);
                            if (keyInfo.Key == ConsoleKey.Q)
                            {
                                _isRecordingStarted = false;
                                _cts.Cancel();
                                break;
                            }
                        }
                        if (MaxDuration.HasValue && (System.DateTime.UtcNow - startTime) > MaxDuration.Value)
                        {
                            Console.WriteLine($"Max recording time of {MaxDuration.Value.TotalSeconds} seconds reached.");
                            _isRecordingStarted = false;
                            _cts.Cancel();
                            break;
                        }
                         
                        // Process all queued messages in the main thread
                        while (_errorQueue.TryDequeue(out var errorRecord))
                        {
                            WriteError(errorRecord);
                        }
                         
                        while (_verboseQueue.TryDequeue(out var verboseMessage))
                        {
                            WriteVerbose (verboseMessage);
                        }
                         
                        while (_results.TryDequeue(out var segment))
                        {
                            WriteObject(Passthru ? segment : segment.Text.Trim());
                        }
                         
                        Thread.Sleep(100);
                    }
                    catch (InvalidOperationException)
                    {
                        // Console input not available, continue
                        Thread.Sleep(100);
                    }
                    catch (Exception ex)
                    {
                        WriteError(new ErrorRecord(ex, "MainLoopError", ErrorCategory.OperationStopped, null));
                        break;
                    }
                }
                // Move cursor up one line and clear it for consistent UI
                // Console.WriteLine("Recording stopped, processing remaining audio...");
                 
                try
                {
                    waveIn.StopRecording();
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Error stopping wave input: {ex.Message}");
                }
                 
                // Wait for processing to complete with proper timeout and error handling
                bool taskCompleted = false;
                try
                {
                    // First try to wait gracefully
                    taskCompleted = processingTask.Wait(TimeSpan.FromSeconds(5));
                     
                    if (!taskCompleted)
                    {
                        // If task doesn't complete, cancel and wait a bit more
                        _cts.Cancel();
                        taskCompleted = processingTask.Wait(TimeSpan.FromSeconds(3));
                    }
                }
                catch (AggregateException ex)
                {
                    // Handle task exceptions
                    foreach (var innerEx in ex.InnerExceptions)
                    {
                        if (!(innerEx is OperationCanceledException))
                        {
                            WriteVerbose($"Processing task error: {innerEx.Message}");
                        }
                    }
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Error waiting for processing task: {ex.Message}");
                }
                 
                // Process any remaining results regardless of task completion
                int timeout = 0;
                while (timeout < 50) // Max 5 seconds
                {
                    bool hasResults = false;
                     
                    // Process all queued messages in the main thread during cleanup
                    while (_errorQueue.TryDequeue(out var errorRecord))
                    {
                        WriteError(errorRecord);
                        hasResults = true;
                    }
                     
                    while (_verboseQueue.TryDequeue(out var verboseMessage))
                    {
                        hasResults = true;
                    }
                     
                    while (_results.TryDequeue(out var segment))
                    {
                        WriteObject(Passthru ? segment : segment.Text.Trim());
                        hasResults = true;
                    }
                     
                    if (!hasResults && taskCompleted)
                    {
                        break; // No more results and task is done
                    }
                     
                    Thread.Sleep(100);
                    timeout++;
                }
            }
        }
        private IWaveIn CreateAudioInput()
        {
            if (UseDesktopAudioCapture.IsPresent)
            {
                if (!string.IsNullOrWhiteSpace(AudioDevice))
                {
                    WriteVerbose($"Looking for desktop audio device matching: {AudioDevice}");
                    // For desktop audio capture, we use the default device but can log the attempt
                    WriteWarning($"Desktop audio device selection by name is not supported in this NAudio version. Using default desktop audio capture.");
                }
                return new WasapiLoopbackCapture();
            }
            else
            {
                if (!string.IsNullOrWhiteSpace(AudioDevice))
                {
                    // Find microphone device by name/GUID with wildcard support
                    WriteVerbose($"Looking for microphone device matching: {AudioDevice}");
                     
                    for (int i = 0; i < WaveIn.DeviceCount; i++)
                    {
                        try
                        {
                            var deviceInfo = WaveIn.GetCapabilities(i);
                            if (IsDeviceMatch(deviceInfo.ProductName, AudioDevice) ||
                                IsDeviceMatch(deviceInfo.ProductGuid.ToString(), AudioDevice))
                            {
                                WriteVerbose($"Selected microphone device: {deviceInfo.ProductName}");
                                var waveIn = new WaveInEvent { DeviceNumber = i };
                                return waveIn;
                            }
                        }
                        catch (Exception ex)
                        {
                            WriteVerbose($"Could not check device {i}: {ex.Message}");
                        }
                    }
                    WriteWarning($"Microphone device '{AudioDevice}' not found, using default");
                }
                return new WaveInEvent();
            }
        }
 
        private bool IsDeviceMatch(string deviceName, string pattern)
        {
            if (string.IsNullOrWhiteSpace(deviceName) || string.IsNullOrWhiteSpace(pattern))
                return false;
 
            // Convert wildcards to regex pattern
            string regexPattern = "^" + System.Text.RegularExpressions.Regex.Escape(pattern)
                .Replace("\\*", ".*")
                .Replace("\\?", ".") + "$";
 
            return System.Text.RegularExpressions.Regex.IsMatch(deviceName, regexPattern,
                System.Text.RegularExpressions.RegexOptions.IgnoreCase);
        }
 
        private WhisperProcessorBuilder ConfigureWhisperBuilder(WhisperProcessorBuilder builder)
        {
            int physicalCoreCount = 0;
            var searcher = new ManagementObjectSearcher("select NumberOfCores from Win32_Processor");
            foreach (var item in searcher.Get())
            {
                physicalCoreCount += Convert.ToInt32(item["NumberOfCores"]);
            }
             
            builder.WithLanguage(Language)
                   .WithThreads(CpuThreads > 0 ? CpuThreads : physicalCoreCount);
             
            // Improved speech detection settings
            if (Temperature.HasValue)
            {
                builder.WithTemperature(Temperature.Value);
            }
            else
            {
                builder.WithTemperature(0.0f); // Lower temperature for more consistent results
            }
             
            if (TemperatureInc.HasValue) builder.WithTemperatureInc(TemperatureInc.Value);
            if (WithTokenTimestamps.IsPresent) builder.WithTokenTimestamps().WithTokenTimestampsSumThreshold(TokenTimestampsSumThreshold);
            if (WithTranslate.IsPresent) builder.WithTranslate();
            if (!string.IsNullOrWhiteSpace(Prompt)) builder.WithPrompt(Prompt);
            if (!string.IsNullOrWhiteSpace(SuppressRegex)) builder.WithSuppressRegex(SuppressRegex);
            if (WithProgress.IsPresent) builder.WithProgressHandler(progress => WriteProgress(new ProgressRecord(1, "Processing", $"Progress: {progress}%") { PercentComplete = progress }));
            if (SplitOnWord.IsPresent) builder.SplitOnWord();
            if (MaxTokensPerSegment.HasValue) builder.WithMaxTokensPerSegment(MaxTokensPerSegment.Value);
             
            // Improved silence/speech detection
            if (IgnoreSilence.IsPresent)
            {
                builder.WithNoSpeechThreshold(0.4f); // More sensitive to speech (lower = more sensitive)
            }
            else if (NoSpeechThreshold.HasValue)
            {
                builder.WithNoSpeechThreshold(NoSpeechThreshold.Value);
            }
            else
            {
                builder.WithNoSpeechThreshold(0.6f); // Default Whisper threshold
            }
             
            if (AudioContextSize.HasValue) builder.WithAudioContextSize(AudioContextSize.Value);
            if (DontSuppressBlank.IsPresent) builder.WithoutSuppressBlank();
            if (MaxDuration.HasValue) builder.WithDuration(MaxDuration.Value);
            if (Offset.HasValue) builder.WithOffset(Offset.Value);
            if (MaxLastTextTokens.HasValue) builder.WithMaxLastTextTokens(MaxLastTextTokens.Value);
            if (SingleSegmentOnly.IsPresent) builder.WithSingleSegment();
            if (PrintSpecialTokens.IsPresent) builder.WithPrintSpecialTokens();
            if (MaxSegmentLength.HasValue) builder.WithMaxSegmentLength(MaxSegmentLength.Value);
            if (MaxInitialTimestamp.HasValue) builder.WithMaxInitialTs((int)MaxInitialTimestamp.Value.TotalSeconds);
            if (LengthPenalty.HasValue) builder.WithLengthPenalty(LengthPenalty.Value);
            if (EntropyThreshold.HasValue) builder.WithEntropyThreshold(EntropyThreshold.Value);
            if (LogProbThreshold.HasValue) builder.WithLogProbThreshold(LogProbThreshold.Value);
            if (NoContext.IsPresent) builder.WithNoContext();
            if (WithBeamSearchSamplingStrategy.IsPresent) builder.WithBeamSearchSamplingStrategy();
             
            return builder;
        }
        private async Task ProcessAudioBuffer()
        {
            using var processingStream = new MemoryStream();
            bool isProcessing = false;
            int processedCount = 0;
             
            try
            {
                while ((!_cts.IsCancellationRequested || _bufferQueue.Count > 0) && !_isDisposed)
                {
                    try
                    {
                        if (_bufferQueue.TryDequeue(out var buffer))
                        {
                            // Validate buffer before processing
                            if (buffer == null || buffer.Length == 0)
                            {
                                continue;
                            }
                             
                            processingStream.Write(buffer, 0, buffer.Length);
                             
                            // Increased threshold for better speech recognition
                            // 48000 bytes = ~3 seconds of audio (16kHz * 1 channel * 2 bytes * 3 seconds)
                            if (!isProcessing && processingStream.Length >= 48000)
                            {
                                var audioDurationSeconds = processingStream.Length / 32000.0; // 16kHz * 2 bytes
                                isProcessing = true;
                                 
                                // Convert raw PCM data to WAV format that Whisper can understand
                                using var wavStream = ConvertPcmToWav(processingStream.ToArray(), 16000, 1, 16);
                                 
                                wavStream.Position = 0;
                                try
                                {
                                    int segmentCount = 0;
                                    await foreach (var segment in _processor.ProcessAsync(wavStream, _cts.Token))
                                    {
                                        if (_cts.IsCancellationRequested || _isDisposed)
                                        {
                                            break;
                                        }
                                         
                                        segmentCount++;
                                        if (!string.IsNullOrWhiteSpace(segment.Text))
                                        {
                                            if (!(segment.Text.Trim("\r\n\t ".ToCharArray()) == "[BLANK_AUDIO]"))
                                            {
                                                _results.Enqueue(segment);
                                            }
                                        }
                                    }
                                }
                                catch (OperationCanceledException)
                                {
                                    break;
                                }
                                catch (Exception ex) when (!_isDisposed)
                                {
                                    // Queue error for main thread to process
                                    _errorQueue.Enqueue(new ErrorRecord(ex, "WhisperProcessingError", ErrorCategory.OperationStopped, null));
                                }
                                 
                                processingStream.SetLength(0);
                                isProcessing = false;
                            }
                        }
                        else
                        {
                            // If we have data but not enough for a full segment, process it anyway when stopping
                            if (!_isRecordingStarted && processingStream.Length > 0 && !isProcessing && !_isDisposed)
                            {
                                var audioDurationSeconds = processingStream.Length / 32000.0;
                                isProcessing = true;
                                 
                                // Convert raw PCM data to WAV format for final processing
                                using var wavStream = ConvertPcmToWav(processingStream.ToArray(), 16000, 1, 16);
                                 
                                wavStream.Position = 0;
                                try
                                {
                                    int segmentCount = 0;
                                    await foreach (var segment in _processor.ProcessAsync(wavStream, _cts.Token))
                                    {
                                        if (_cts.IsCancellationRequested || _isDisposed)
                                        {
                                            break;
                                        }
                                         
                                        segmentCount++;
                                        if (!string.IsNullOrWhiteSpace(segment.Text))
                                        {
                                            _results.Enqueue(segment);
                                        }
                                    }
                                }
                                catch (OperationCanceledException)
                                {
                                    break;
                                }
                                catch (Exception ex) when (!_isDisposed)
                                {
                                    // Queue error for main thread to process
                                    _errorQueue.Enqueue(new ErrorRecord(ex, "FinalProcessingError", ErrorCategory.OperationStopped, null));
                                }
                                 
                                processingStream.SetLength(0);
                                isProcessing = false;
                            }
                            await Task.Delay(50, _cts.Token);
                        }
                    }
                    catch (OperationCanceledException)
                    {
                        break;
                    }
                    catch (Exception ex) when (!(ex is OperationCanceledException) && !_isDisposed)
                    {
                        // Queue error for main thread to process instead of calling WriteError directly
                        _errorQueue.Enqueue(new ErrorRecord(ex, "ProcessingError", ErrorCategory.OperationStopped, null));
                        break;
                    }
                }
            }
            catch (Exception ex) when (!_isDisposed)
            {
            }
        }
        private MemoryStream ConvertPcmToWav(byte[] pcmData, int sampleRate, int channels, int bitsPerSample)
        {
            var wavStream = new MemoryStream();
             
            int bytesPerSample = bitsPerSample / 8;
            int byteRate = sampleRate * channels * bytesPerSample;
            int blockAlign = channels * bytesPerSample;
             
            // Write WAV header
            // "RIFF" chunk descriptor
            wavStream.Write(System.Text.Encoding.ASCII.GetBytes("RIFF"), 0, 4);
            wavStream.Write(BitConverter.GetBytes(36 + pcmData.Length), 0, 4); // File size - 8
            wavStream.Write(System.Text.Encoding.ASCII.GetBytes("WAVE"), 0, 4);
             
            // "fmt " sub-chunk
            wavStream.Write(System.Text.Encoding.ASCII.GetBytes("fmt "), 0, 4);
            wavStream.Write(BitConverter.GetBytes(16), 0, 4); // Sub-chunk size
            wavStream.Write(BitConverter.GetBytes((short)1), 0, 2); // Audio format (1 = PCM)
            wavStream.Write(BitConverter.GetBytes((short)channels), 0, 2); // Number of channels
            wavStream.Write(BitConverter.GetBytes(sampleRate), 0, 4); // Sample rate
            wavStream.Write(BitConverter.GetBytes(byteRate), 0, 4); // Byte rate
            wavStream.Write(BitConverter.GetBytes((short)blockAlign), 0, 2); // Block align
            wavStream.Write(BitConverter.GetBytes((short)bitsPerSample), 0, 2); // Bits per sample
             
            // "data" sub-chunk
            wavStream.Write(System.Text.Encoding.ASCII.GetBytes("data"), 0, 4);
            wavStream.Write(BitConverter.GetBytes(pcmData.Length), 0, 4); // Data size
            wavStream.Write(pcmData, 0, pcmData.Length); // The actual audio data
             
            return wavStream;
        }
        protected override void EndProcessing()
        {
            lock (_disposeLock)
            {
                if (_isDisposed) return;
                _isDisposed = true;
            }
             
             
            try
            {
                // Stop recording first to prevent new audio data
                _isRecordingStarted = false;
                 
                // Cancel any ongoing operations
                if (_cts != null && !_cts.IsCancellationRequested)
                {
                    _cts.Cancel();
                }
                 
                // Wait for processing task to complete with timeout
                if (_processingTask != null && !_processingTask.IsCompleted)
                {
                    try
                    {
                        var completed = _processingTask.Wait(TimeSpan.FromSeconds(50));
                        if (!completed)
                        {
                            WriteVerbose("Processing task did not complete within timeout");
                        }
                    }
                    catch (AggregateException ex)
                    {
                        // Expected when task is cancelled
                        WriteVerbose($"Processing task cancelled: {ex.InnerExceptions.FirstOrDefault()?.Message}");
                    }
                    catch (Exception ex)
                    {
                        WriteVerbose($"Error waiting for processing task: {ex.Message}");
                    }
                }
                 
                // Dispose Whisper processor first (it uses the factory)
                if (_processor != null)
                {
                    try
                    {
                        // Properly dispose async resources
                        if (_processor is IAsyncDisposable asyncDisposable)
                        {
                            // Wait for async disposal to complete
                            asyncDisposable.DisposeAsync().AsTask().Wait(TimeSpan.FromSeconds(50));
                        }
                        else if (_processor is IDisposable disposable)
                        {
                            disposable.Dispose();
                        }
                    }
                    catch (Exception ex)
                    {
                        WriteVerbose($"Error disposing Whisper processor: {ex.Message}");
                    }
                    finally
                    {
                        _processor = null;
                    }
                }
                 
                // Then dispose Whisper factory
                if (_whisperFactory != null)
                {
                    try
                    {
                        _whisperFactory.Dispose();
                    }
                    catch (Exception ex)
                    {
                        WriteVerbose($"Error disposing Whisper factory: {ex.Message}");
                    }
                    finally
                    {
                        _whisperFactory = null;
                    }
                }
                 
                // Finally dispose cancellation token source
                if (_cts != null)
                {
                    try
                    {
                        _cts.Dispose();
                    }
                    catch (Exception ex)
                    {
                        WriteVerbose($"Error disposing cancellation token source: {ex.Message}");
                    }
                    finally
                    {
                        _cts = null;
                    }
                }
            }
            catch (Exception ex)
            {
                WriteVerbose($"Error in EndProcessing: {ex.Message}");
            }
             
            base.EndProcessing();
        }
        private static async Task DownloadModel(string fileName, GgmlType ggmlType)
        {
            Console.WriteLine($"Downloading Model {fileName}");
            using var modelStream = await WhisperGgmlDownloader.GetGgmlModelAsync(ggmlType);
            using var fileWriter = File.OpenWrite(fileName);
            await modelStream.CopyToAsync(fileWriter);
        }
 
        private static string GetModelFileName(GgmlType modelType)
        {
            return $"ggml-{modelType}.bin";
        }
    }
}