tool/Analysis/functions/Invoke-AnalyseEmails.ps1

function Invoke-AnalyseEmails {
    <#
    .SYNOPSIS
        AI-powered extraction of contacts, opportunity, and sentiment per email.

    .DESCRIPTION
        Reads email-manifest.json and for each email, calls an LLM to extract:
        - Participants (name, role, org, internal/external)
        - Opportunity (category, summary, stage)
        - Sentiment (tone, engagement, conclusion, signals)

        Results are written to email-analyses.json.

    .PARAMETER ManifestPath
        Path to email-manifest.json from the gather stage.

    .PARAMETER OutputPath
        Path to data/ directory where email-analyses.json will be written.

    .PARAMETER Model
        Override model. Default: uses AIUtilities configured default.

    .PARAMETER Provider
        Override provider (openai, gemini).

    .PARAMETER DelayMs
        Delay between API calls in milliseconds. Default: 800.

    .OUTPUTS
        PSCustomObject with total_analysed, total_errors.
    #>

    [CmdletBinding()]
    param(
        [Parameter(Mandatory)]
        [string]$ManifestPath,

        [Parameter(Mandatory)]
        [string]$OutputPath,

        [string]$Model,

        [string]$Provider,

        [string]$CachePath,

        [string[]]$OwnerDomains = @(),

        [PSCustomObject]$OwnerProfile,

        [int]$DelayMs = 800
    )

    # Load manifest
    $manifest = Get-Content $ManifestPath -Raw | ConvertFrom-Json

    # Load prompt template
    $promptDir = Join-Path (Split-Path $PSScriptRoot -Parent) 'prompts'
    $promptConfig = Get-Content (Join-Path $promptDir 'parse-email.prompt.yaml') -Raw

    # Extract system prompt from YAML (simple extraction - between system_prompt: | and next top-level key)
    $systemPrompt = @"
You are an expert business analyst extracting structured opportunity data from email threads. You identify participants, classify opportunities, assess sentiment, and determine conversation stage.

RULES:
- Return valid JSON only — no markdown fences, no preamble, no explanation
- Classify each participant as "internal" (the sender's organisation based on their email domain) or "external"
- Infer roles from email signatures, context, and domain expertise
- Opportunity category must be one or more of: Sales/BD, Partnership, Fundraising
- Conversation stage: initial (first contact), active (back-and-forth), stalled (no response implied), near-close (contract/meeting imminent)
- Sentiment is assessed from the FULL thread, not individual messages
- If information cannot be determined, use reasonable inference or "Unknown"
"@


    # Inject owner context if available
    if ($OwnerDomains.Count -gt 0) {
        $domainList = $OwnerDomains -join ', '
        $systemPrompt += "`n`nOWNER COMPANY DOMAINS (classify as internal):`n$domainList`nAny participant whose email domain matches one of these domains is INTERNAL — they work for the lead-mining company."
    }
    if ($OwnerProfile) {
        $systemPrompt += "`n`nOWNER COMPANY CONTEXT:`n- Company: $($OwnerProfile.name)`n- Industry: $($OwnerProfile.industry)"
        if ($OwnerProfile.aliases) {
            $aliasNames = ($OwnerProfile.aliases | ForEach-Object { $_.name }) -join ', '
            $systemPrompt += "`n- Also known as: $aliasNames"
        }
    }

    $errors = @()
    $jsonlPath = Join-Path $OutputPath 'email-analyses.jsonl'
    # Clear any existing JSONL from a previous partial run
    if (Test-Path $jsonlPath) { Remove-Item $jsonlPath -Force }
    $emailCount = 0

    foreach ($email in $manifest.emails) {
        # Skip emails that failed to parse
        if ($email.parse_error) {
            $errorEntry = [ordered]@{
                file_path      = $email.file_path
                subject        = $null
                date           = $null
                participants   = @()
                opportunity    = $null
                sentiment      = $null
                analysis_error = "Email parse failed: $($email.parse_error)"
            }
            ($errorEntry | ConvertTo-Json -Depth 10 -Compress) | Add-Content -Path $jsonlPath -Encoding UTF8
            $errors += [ordered]@{ file = $email.file_path; error = "Skipped (parse error)" }
            continue
        }

        $emailCount++

        # Assemble user prompt
        $subject = $email.headers.subject
        $date = $email.headers.date
        $from = "$($email.headers.from.name) <$($email.headers.from.email)>"
        $to = ($email.headers.to | ForEach-Object { "$($_.name) <$($_.email)>" }) -join ', '
        $cc = if ($email.headers.cc) { ($email.headers.cc | ForEach-Object { "$($_.name) <$($_.email)>" }) -join ', ' } else { '' }
        $body = $email.body.plain_text

        # Truncate very long bodies to stay within token limits
        if ($body.Length -gt 8000) {
            $body = $body.Substring(0, 8000) + "`n`n[... truncated ...]"
        }

        $attachmentsList = if ($email.attachments -and $email.attachments.Count -gt 0) {
            ($email.attachments | ForEach-Object { $_.filename }) -join ', '
        }
        else { '' }

        $userPrompt = @"
Analyse this email thread and extract structured opportunity data.

## Email Metadata
- Subject: $subject
- Date: $date
- From: $from
- To: $to
$(if ($cc) { "- CC: $cc" })
$(if ($attachmentsList) { "- Attachments: $attachmentsList" })

## Email Body
$body

## Required Output (JSON)
Return a JSON object with this exact structure:
{
  "participants": [
    {
      "name": "Full Name",
      "email": "email@example.com",
      "role": "Job Title or Unknown",
      "organisation": "Company Name or Unknown",
      "type": "internal|external"
    }
  ],
  "opportunity": {
    "category": ["Sales/BD"],
    "summary": "2-3 sentence summary of the opportunity and context",
    "stage": "initial|active|stalled|near-close"
  },
  "sentiment": {
    "overall_tone": "Warm|Neutral|Cool",
    "engagement_level": "Active mutual|One-sided|Passive",
    "conclusion": "follow-up planned|stalled|declined|near-close",
    "positive_signals": ["signal 1"],
    "negative_signals": []
  }
}
"@


        try {
            Write-Host " [$emailCount/$($manifest.emails.Count)] $($email.file_name)..." -NoNewline -ForegroundColor Gray

            $aiParams = @{
                SystemPrompt = $systemPrompt
                UserPrompt   = $userPrompt
                JsonMode     = $true
                MaxTokens    = 1200
                Temperature  = 0.1
            }
            if ($Model) { $aiParams.Model = $Model }
            if ($Provider) { $aiParams.Provider = $Provider }

            # Use cached variant if cache path provided
            if ($CachePath) {
                $cacheKey = ($email.file_name -replace '[^\w\-\.]', '_') -replace '\.eml$|\.emltpl$', ''
                $aiParams.CachePath = $CachePath
                $aiParams.CacheKey = $cacheKey
                $response = Invoke-PCCompletionCached @aiParams
            }
            else {
                $response = Invoke-PCCompletion @aiParams
            }
            if (-not $response) { throw 'Empty response from AI' }

            # Sanitize JSON (handle Infinity/NaN from some providers)
            $response = $response -replace '(?<=[\s,:[\{])-?Infinity', 'null' -replace '(?<=[\s,:[\{])NaN', 'null'

            # Parse the JSON response
            $parsed = $response | ConvertFrom-Json

            # Deterministic owner domain override
            if ($OwnerDomains.Count -gt 0 -and $parsed.participants) {
                foreach ($participant in $parsed.participants) {
                    if ($participant.email) {
                        $emailDomain = ($participant.email -split '@')[-1].ToLower()
                        if ($emailDomain -in $OwnerDomains) {
                            $participant.type = 'internal'
                        }
                    }
                }
            }

            $successEntry = [ordered]@{
                file_path      = $email.file_path
                subject        = $subject
                date           = $date
                participants   = $parsed.participants
                opportunity    = $parsed.opportunity
                sentiment      = $parsed.sentiment
                analysis_error = $null
            }
            # Write incrementally to JSONL
            ($successEntry | ConvertTo-Json -Depth 10 -Compress) | Add-Content -Path $jsonlPath -Encoding UTF8

            Write-Host " ✓" -ForegroundColor Green
        }
        catch {
            $errorEntry = [ordered]@{
                file_path      = $email.file_path
                subject        = $subject
                date           = $date
                participants   = @()
                opportunity    = $null
                sentiment      = $null
                analysis_error = $_.Exception.Message
            }
            ($errorEntry | ConvertTo-Json -Depth 10 -Compress) | Add-Content -Path $jsonlPath -Encoding UTF8
            $errors += [ordered]@{ file = $email.file_path; error = $_.Exception.Message }
            Write-Host " ✗ $($_.Exception.Message)" -ForegroundColor Red
        }

        # Rate limiting
        if ($emailCount -lt $manifest.emails.Count) {
            Start-Sleep -Milliseconds $DelayMs
        }
    }

    # Assemble final JSON from JSONL
    $analyses = @()
    if (Test-Path $jsonlPath) {
        $analyses = @(Get-Content $jsonlPath -Encoding UTF8 | Where-Object { $_.Trim() } | ForEach-Object { $_ | ConvertFrom-Json })
    }

    $output = [ordered]@{
        metadata = [ordered]@{
            generated_at   = (Get-Date).ToString('o')
            model          = if ($Model) { $Model } else { 'provider-default' }
            total_analysed = ($analyses | Where-Object { $null -eq $_.analysis_error }).Count
            total_errors   = $errors.Count
        }
        analyses = $analyses
    }

    $outputFile = Join-Path $OutputPath 'email-analyses.json'
    $output | ConvertTo-Json -Depth 10 | Set-Content -Path $outputFile -Encoding UTF8

    # Clean up JSONL
    if (Test-Path $jsonlPath) { Remove-Item $jsonlPath -Force }

    # Write errors separately if any
    if ($errors.Count -gt 0) {
        $errorsFile = Join-Path $OutputPath 'errors.json'
        $errors | ConvertTo-Json -Depth 5 | Set-Content -Path $errorsFile -Encoding UTF8
    }

    # Return summary
    [PSCustomObject]@{
        total_analysed = $output.metadata.total_analysed
        total_errors   = $errors.Count
        output_path    = $outputFile
    }
}