tool/Analysis/functions/Invoke-AnalyseEmails.ps1
|
function Invoke-AnalyseEmails { <# .SYNOPSIS AI-powered extraction of contacts, opportunity, and sentiment per email. .DESCRIPTION Reads email-manifest.json and for each email, calls an LLM to extract: - Participants (name, role, org, internal/external) - Opportunity (category, summary, stage) - Sentiment (tone, engagement, conclusion, signals) Results are written to email-analyses.json. .PARAMETER ManifestPath Path to email-manifest.json from the gather stage. .PARAMETER OutputPath Path to data/ directory where email-analyses.json will be written. .PARAMETER Model Override model. Default: uses AIUtilities configured default. .PARAMETER Provider Override provider (openai, gemini). .PARAMETER DelayMs Delay between API calls in milliseconds. Default: 800. .OUTPUTS PSCustomObject with total_analysed, total_errors. #> [CmdletBinding()] param( [Parameter(Mandatory)] [string]$ManifestPath, [Parameter(Mandatory)] [string]$OutputPath, [string]$Model, [string]$Provider, [string]$CachePath, [string[]]$OwnerDomains = @(), [PSCustomObject]$OwnerProfile, [int]$DelayMs = 800 ) # Load manifest $manifest = Get-Content $ManifestPath -Raw | ConvertFrom-Json # Load prompt template $promptDir = Join-Path (Split-Path $PSScriptRoot -Parent) 'prompts' $promptConfig = Get-Content (Join-Path $promptDir 'parse-email.prompt.yaml') -Raw # Extract system prompt from YAML (simple extraction - between system_prompt: | and next top-level key) $systemPrompt = @" You are an expert business analyst extracting structured opportunity data from email threads. You identify participants, classify opportunities, assess sentiment, and determine conversation stage. RULES: - Return valid JSON only — no markdown fences, no preamble, no explanation - Classify each participant as "internal" (the sender's organisation based on their email domain) or "external" - Infer roles from email signatures, context, and domain expertise - Opportunity category must be one or more of: Sales/BD, Partnership, Fundraising - Conversation stage: initial (first contact), active (back-and-forth), stalled (no response implied), near-close (contract/meeting imminent) - Sentiment is assessed from the FULL thread, not individual messages - If information cannot be determined, use reasonable inference or "Unknown" "@ # Inject owner context if available if ($OwnerDomains.Count -gt 0) { $domainList = $OwnerDomains -join ', ' $systemPrompt += "`n`nOWNER COMPANY DOMAINS (classify as internal):`n$domainList`nAny participant whose email domain matches one of these domains is INTERNAL — they work for the lead-mining company." } if ($OwnerProfile) { $systemPrompt += "`n`nOWNER COMPANY CONTEXT:`n- Company: $($OwnerProfile.name)`n- Industry: $($OwnerProfile.industry)" if ($OwnerProfile.aliases) { $aliasNames = ($OwnerProfile.aliases | ForEach-Object { $_.name }) -join ', ' $systemPrompt += "`n- Also known as: $aliasNames" } } $errors = @() $jsonlPath = Join-Path $OutputPath 'email-analyses.jsonl' # Clear any existing JSONL from a previous partial run if (Test-Path $jsonlPath) { Remove-Item $jsonlPath -Force } $emailCount = 0 foreach ($email in $manifest.emails) { # Skip emails that failed to parse if ($email.parse_error) { $errorEntry = [ordered]@{ file_path = $email.file_path subject = $null date = $null participants = @() opportunity = $null sentiment = $null analysis_error = "Email parse failed: $($email.parse_error)" } ($errorEntry | ConvertTo-Json -Depth 10 -Compress) | Add-Content -Path $jsonlPath -Encoding UTF8 $errors += [ordered]@{ file = $email.file_path; error = "Skipped (parse error)" } continue } $emailCount++ # Assemble user prompt $subject = $email.headers.subject $date = $email.headers.date $from = "$($email.headers.from.name) <$($email.headers.from.email)>" $to = ($email.headers.to | ForEach-Object { "$($_.name) <$($_.email)>" }) -join ', ' $cc = if ($email.headers.cc) { ($email.headers.cc | ForEach-Object { "$($_.name) <$($_.email)>" }) -join ', ' } else { '' } $body = $email.body.plain_text # Truncate very long bodies to stay within token limits if ($body.Length -gt 8000) { $body = $body.Substring(0, 8000) + "`n`n[... truncated ...]" } $attachmentsList = if ($email.attachments -and $email.attachments.Count -gt 0) { ($email.attachments | ForEach-Object { $_.filename }) -join ', ' } else { '' } $userPrompt = @" Analyse this email thread and extract structured opportunity data. ## Email Metadata - Subject: $subject - Date: $date - From: $from - To: $to $(if ($cc) { "- CC: $cc" }) $(if ($attachmentsList) { "- Attachments: $attachmentsList" }) ## Email Body $body ## Required Output (JSON) Return a JSON object with this exact structure: { "participants": [ { "name": "Full Name", "email": "email@example.com", "role": "Job Title or Unknown", "organisation": "Company Name or Unknown", "type": "internal|external" } ], "opportunity": { "category": ["Sales/BD"], "summary": "2-3 sentence summary of the opportunity and context", "stage": "initial|active|stalled|near-close" }, "sentiment": { "overall_tone": "Warm|Neutral|Cool", "engagement_level": "Active mutual|One-sided|Passive", "conclusion": "follow-up planned|stalled|declined|near-close", "positive_signals": ["signal 1"], "negative_signals": [] } } "@ try { Write-Host " [$emailCount/$($manifest.emails.Count)] $($email.file_name)..." -NoNewline -ForegroundColor Gray $aiParams = @{ SystemPrompt = $systemPrompt UserPrompt = $userPrompt JsonMode = $true MaxTokens = 1200 Temperature = 0.1 } if ($Model) { $aiParams.Model = $Model } if ($Provider) { $aiParams.Provider = $Provider } # Use cached variant if cache path provided if ($CachePath) { $cacheKey = ($email.file_name -replace '[^\w\-\.]', '_') -replace '\.eml$|\.emltpl$', '' $aiParams.CachePath = $CachePath $aiParams.CacheKey = $cacheKey $response = Invoke-PCCompletionCached @aiParams } else { $response = Invoke-PCCompletion @aiParams } if (-not $response) { throw 'Empty response from AI' } # Parse the JSON response (with repair for common LLM issues) try { $parsed = Repair-JsonResponse -Response $response } catch { # Retry once on parse failure Write-Host " ↻" -NoNewline -ForegroundColor Yellow if ($CachePath) { # Bust cache for retry $cacheBust = Join-Path $CachePath "$cacheKey.json" if (Test-Path $cacheBust) { Remove-Item $cacheBust -Force } $response = Invoke-PCCompletionCached @aiParams } else { $response = Invoke-PCCompletion @aiParams } if (-not $response) { throw 'Empty response from AI (retry)' } $parsed = Repair-JsonResponse -Response $response } # Deterministic owner domain override if ($OwnerDomains.Count -gt 0 -and $parsed.participants) { foreach ($participant in $parsed.participants) { if ($participant.email) { $emailDomain = ($participant.email -split '@')[-1].ToLower() if ($emailDomain -in $OwnerDomains) { $participant.type = 'internal' } } } } $successEntry = [ordered]@{ file_path = $email.file_path subject = $subject date = $date participants = $parsed.participants opportunity = $parsed.opportunity sentiment = $parsed.sentiment analysis_error = $null } # Write incrementally to JSONL ($successEntry | ConvertTo-Json -Depth 10 -Compress) | Add-Content -Path $jsonlPath -Encoding UTF8 Write-Host " ✓" -ForegroundColor Green } catch { $errorEntry = [ordered]@{ file_path = $email.file_path subject = $subject date = $date participants = @() opportunity = $null sentiment = $null analysis_error = $_.Exception.Message } ($errorEntry | ConvertTo-Json -Depth 10 -Compress) | Add-Content -Path $jsonlPath -Encoding UTF8 $errors += [ordered]@{ file = $email.file_path; error = $_.Exception.Message } Write-Host " ✗ $($_.Exception.Message)" -ForegroundColor Red } # Rate limiting if ($emailCount -lt $manifest.emails.Count) { Start-Sleep -Milliseconds $DelayMs } } # Assemble final JSON from JSONL $analyses = @() if (Test-Path $jsonlPath) { $analyses = @(Get-Content $jsonlPath -Encoding UTF8 | Where-Object { $_.Trim() } | ForEach-Object { $_ | ConvertFrom-Json }) } $output = [ordered]@{ metadata = [ordered]@{ generated_at = (Get-Date).ToString('o') model = if ($Model) { $Model } else { 'provider-default' } total_analysed = ($analyses | Where-Object { $null -eq $_.analysis_error }).Count total_errors = $errors.Count } analyses = $analyses } $outputFile = Join-Path $OutputPath 'email-analyses.json' $output | ConvertTo-Json -Depth 10 | Set-Content -Path $outputFile -Encoding UTF8 # Clean up JSONL if (Test-Path $jsonlPath) { Remove-Item $jsonlPath -Force } # Write errors separately if any if ($errors.Count -gt 0) { $errorsFile = Join-Path $OutputPath 'errors.json' $errors | ConvertTo-Json -Depth 5 | Set-Content -Path $errorsFile -Encoding UTF8 } # Return summary [PSCustomObject]@{ total_analysed = $output.metadata.total_analysed total_errors = $errors.Count output_path = $outputFile } } |