tool/Analysis/functions/Invoke-DeepResearch.ps1
|
function Invoke-DeepResearch { <# .SYNOPSIS Deep web-search research using GPT-5.5-pro on triage-selected contacts. .DESCRIPTION Reads contact-triage.json to identify contacts selected for deep research. Uses GPT-5.5-pro via the Responses API with web_search_preview enabled to find current information about each contact. Deep research results are merged with Pass 1 results: Pass 2 values override "Unknown" fields from Pass 1, but existing non-Unknown Pass 1 values are preserved unless Pass 2 provides higher-confidence data. Final merged results are written to contact-research.json. .PARAMETER TriagePath Path to contact-triage.json from the triage stage. .PARAMETER Pass1Path Path to contact-research-pass1.json for merging. .PARAMETER EnrichmentPath Path to contact-enrichment.json for additional context. .PARAMETER OutputPath Path to data/ directory where contact-research.json will be written. .PARAMETER Model Model to use for deep research. Default: gpt-5.5-pro. .PARAMETER CachePath Directory for response caching. .PARAMETER DelayMs Delay between API calls in milliseconds. Default: 2000 (GPT-5.5-pro is slow). .PARAMETER MaxErrors Circuit breaker: stop after this many consecutive errors. Default: 3. .PARAMETER TimeoutSeconds Timeout for each deep research call. Default: 300 (5 min, GPT-5.5-pro can be slow). .OUTPUTS PSCustomObject with total_contacts, deep_researched, pass1_only, total_errors. #> [CmdletBinding()] param( [Parameter(Mandatory)] [string]$TriagePath, [Parameter(Mandatory)] [string]$Pass1Path, [string]$EnrichmentPath, [Parameter(Mandatory)] [string]$OutputPath, [string]$Provider, [string]$Model, [int]$MaxTokens, [double]$Temperature, [string]$CachePath, [int]$DelayMs = 2000, [int]$MaxErrors = 3, [int]$TimeoutSeconds = 300 ) # Load inputs $triage = Get-Content $TriagePath -Raw | ConvertFrom-Json $pass1 = Get-Content $Pass1Path -Raw | ConvertFrom-Json # Build lookup of pass1 results by email $pass1Lookup = @{} foreach ($contact in $pass1.contacts) { if ($contact.email) { $pass1Lookup[$contact.email.ToLowerInvariant()] = $contact } } # Load enrichment for context $enrichment = $null if ($EnrichmentPath -and (Test-Path $EnrichmentPath)) { $enrichment = Get-Content $EnrichmentPath -Raw | ConvertFrom-Json } # Get selected contacts (sorted by priority) $selectedEmails = @($triage.selected | Where-Object { $_.selected } | ForEach-Object { $_.email.ToLowerInvariant() }) Write-Host " $($selectedEmails.Count) contacts selected for deep research" -ForegroundColor Gray $systemPrompt = @" You are a professional research analyst performing deep web research on business contacts. You have access to web search — USE IT to find current, verified information. CRITICAL RULES: - Return ONLY a valid JSON object — no markdown fences, no preamble - Search for the person by name + company + industry context - Verify information from multiple sources when possible - Include URLs of sources you found - If you find the person on LinkedIn, note their current title and company - Look for recent news, conference appearances, company announcements - Check if their company is active in telematics, connected cars, fleet management, IoT, or mobility CONFIDENCE GUIDE: - High: Found on LinkedIn + company website, or multiple independent sources - Medium: Found on one source (LinkedIn OR company page OR news article) - Low: Inferred from indirect sources or partial matches only "@ $deepResults = @{} $deepCount = 0 $consecutiveErrors = 0 foreach ($email in $selectedEmails) { $deepCount++ if ($consecutiveErrors -ge $MaxErrors) { Write-Host " Circuit breaker: $MaxErrors consecutive errors, stopping deep research" -ForegroundColor Red break } $pass1Contact = $pass1Lookup[$email] if (-not $pass1Contact) { Write-Host " [$deepCount/$($selectedEmails.Count)] $email — not found in Pass 1, skipping" -ForegroundColor Yellow continue } # Build enrichment context $enrichmentContext = "" if ($enrichment) { $contactEnrich = $null if ($enrichment.contacts.PSObject.Properties.Name -contains $email) { $contactEnrich = $enrichment.contacts.$email } if ($contactEnrich -and -not $contactEnrich.is_free_provider) { $domain = $contactEnrich.domain if ($enrichment.domains.PSObject.Properties.Name -contains $domain) { $domainInfo = $enrichment.domains.$domain if ($domainInfo.pages) { $successPages = @($domainInfo.pages.PSObject.Properties | Where-Object { $_.Value.success -eq $true }) if ($successPages.Count -gt 0) { $snippets = ($successPages | ForEach-Object { $content = $_.Value.content if ($content -and $content.Length -gt 200) { $content = $content.Substring(0, 200) + '...' } "$($_.Name): $content" }) -join "`n" $enrichmentContext = @" ## Company Website Content (from $domain) $snippets "@ } } } } } # Pass 1 context $pass1Context = "" if ($pass1Contact.research) { $r = $pass1Contact.research $pass1Context = @" ## Pass 1 Research (to verify or improve) - Current title: $($r.current_title) - Current organisation: $($r.current_organisation) - Still in role: $($r.still_in_role) - Telematics activity: found=$($r.telematics_activity.found), summary="$($r.telematics_activity.summary)" - LinkedIn: $($r.linkedin_active) - Pass 1 confidence: $($r.confidence) "@ } $userPrompt = @" Deep research this business contact using web search: ## Contact - Name: $($pass1Contact.name) - Email: $($pass1Contact.email) - Role at time of contact: $($pass1Contact.original_role) - Organisation at time of contact: $($pass1Contact.original_organisation) - Date of email exchange: $($pass1Contact.email_date) $pass1Context $enrichmentContext ## Research Tasks 1. Search the web for "$($pass1Contact.name)" + "$($pass1Contact.original_organisation)" 2. Find their CURRENT job title and employer (check LinkedIn, company websites, news) 3. Determine if they are still at $($pass1Contact.original_organisation) 4. Look for involvement in telematics, connected car, UBI, fleet management, IoT, or mobility 5. Check LinkedIn profile activity 6. Provide source URLs for your findings ## Required Output (JSON) { "current_title": "Current Job Title or Unknown", "current_organisation": "Current Employer or Unknown", "still_in_role": "Yes|No|Unknown", "telematics_activity": { "found": true/false, "summary": "Brief description of any telematics/IoT activity found", "since_email_date": true/false }, "linkedin_active": "Active|Inactive|Unknown|No Profile Found", "confidence": "High|Medium|Low", "source_urls": ["url1", "url2"] } "@ Write-Host " [$deepCount/$($selectedEmails.Count)] $($pass1Contact.name) ($email)" -NoNewline -ForegroundColor White try { $aiParams = @{ SystemPrompt = $systemPrompt UserPrompt = $userPrompt JsonMode = $true MaxTokens = if ($MaxTokens) { $MaxTokens } else { 8192 } Temperature = if ($Temperature) { $Temperature } else { 0.3 } Provider = if ($Provider) { $Provider } else { 'openai' } WebSearch = $true TimeoutSec = $TimeoutSeconds } if ($Model) { $aiParams.Model = $Model } if ($CachePath) { $cacheKey = ($email -replace '[^\w\-\.]', '_') $aiParams.CachePath = $CachePath $aiParams.CacheKey = $cacheKey $response = Invoke-PCCompletionCached @aiParams } else { $response = Invoke-PCCompletion @aiParams } if (-not $response) { throw 'Empty response from AI' } # Sanitize and parse (using shared repair function) $parsed = Repair-JsonResponse -Response $response $deepResults[$email] = $parsed $consecutiveErrors = 0 Write-Host " ✓ ($($parsed.confidence))" -ForegroundColor Green } catch { $consecutiveErrors++ Write-Host " ✗ $($_.Exception.Message)" -ForegroundColor Red } if ($deepCount -lt $selectedEmails.Count) { Start-Sleep -Milliseconds $DelayMs } } # --- Merge Pass 1 + Pass 2 into final research --- Write-Host " Merging Pass 1 + Pass 2 results..." -ForegroundColor Gray $finalContacts = @() $deepResearchedCount = 0 foreach ($contact in $pass1.contacts) { $email = $contact.email.ToLowerInvariant() $deep = $deepResults[$email] if ($deep) { $deepResearchedCount++ # Merge: Pass 2 overrides "Unknown" fields from Pass 1 $merged = Merge-DeepResearch -Pass1 $contact.research -Pass2 $deep $finalContacts += [ordered]@{ name = $contact.name email = $contact.email original_role = $contact.original_role original_organisation = $contact.original_organisation email_date = $contact.email_date research = $merged research_source = 'pass1+pass2' research_error = $null } } else { # Pass 1 only $finalContacts += [ordered]@{ name = $contact.name email = $contact.email original_role = $contact.original_role original_organisation = $contact.original_organisation email_date = $contact.email_date research = $contact.research research_source = 'pass1' research_error = $contact.research_error } } } # Write final output $output = [ordered]@{ metadata = [ordered]@{ generated_at = (Get-Date).ToString('o') total_contacts = $pass1.contacts.Count deep_researched = $deepResearchedCount pass1_only = $pass1.contacts.Count - $deepResearchedCount total_errors = $consecutiveErrors model = $Model } contacts = $finalContacts } $outputFile = Join-Path $OutputPath 'contact-research.json' $output | ConvertTo-Json -Depth 10 | Set-Content -Path $outputFile -Encoding UTF8 Write-Host " Final research: $deepResearchedCount deep + $($pass1.contacts.Count - $deepResearchedCount) pass1-only" -ForegroundColor Green [PSCustomObject]@{ total_contacts = $pass1.contacts.Count deep_researched = $deepResearchedCount pass1_only = $pass1.contacts.Count - $deepResearchedCount total_errors = $consecutiveErrors output_path = $outputFile } } function Merge-DeepResearch { <# .SYNOPSIS Merge Pass 2 deep research into Pass 1 results. .DESCRIPTION Pass 2 values override Pass 1 "Unknown" fields. If Pass 1 already has a non-Unknown value, Pass 2 only overrides if it also has a non-Unknown value (preferring the deeper research). #> param( [object]$Pass1, [object]$Pass2 ) if (-not $Pass1) { $Pass1 = [PSCustomObject]@{} } # Helper: pick best value (Pass 2 wins if non-Unknown) function Pick-Best { param([string]$P1Val, [string]$P2Val) if ($P2Val -and $P2Val -ne 'Unknown' -and $P2Val -ne '') { return $P2Val } if ($P1Val -and $P1Val -ne 'Unknown' -and $P1Val -ne '') { return $P1Val } return 'Unknown' } # Merge source URLs $p1Urls = @() if ($Pass1.source_urls) { $p1Urls = @($Pass1.source_urls) } $p2Urls = @() if ($Pass2.source_urls) { $p2Urls = @($Pass2.source_urls) } $allUrls = @($p1Urls + $p2Urls | Where-Object { $_ } | Sort-Object -Unique) # Merge telematics $telFound = [bool]$Pass2.telematics_activity.found -or [bool]$Pass1.telematics_activity.found $telSince = [bool]$Pass2.telematics_activity.since_email_date -or [bool]$Pass1.telematics_activity.since_email_date $telSummary = if ($Pass2.telematics_activity.summary -and $Pass2.telematics_activity.summary -ne '') { $Pass2.telematics_activity.summary } elseif ($Pass1.telematics_activity.summary -and $Pass1.telematics_activity.summary -ne '') { $Pass1.telematics_activity.summary } else { '' } # Confidence: upgrade if Pass 2 found more $confLevels = @{ 'High' = 3; 'Medium' = 2; 'Low' = 1 } $p1ConfStr = "$($Pass1.confidence)" $p2ConfStr = "$($Pass2.confidence)" $p1Conf = if ($confLevels.ContainsKey($p1ConfStr)) { $confLevels[$p1ConfStr] } else { 0 } $p2Conf = if ($confLevels.ContainsKey($p2ConfStr)) { $confLevels[$p2ConfStr] } else { 0 } $finalConf = if ($p2Conf -ge $p1Conf) { $p2ConfStr } else { $p1ConfStr } if (-not $finalConf -or $finalConf -eq '') { $finalConf = 'Low' } return [ordered]@{ current_title = Pick-Best -P1Val "$($Pass1.current_title)" -P2Val "$($Pass2.current_title)" current_organisation = Pick-Best -P1Val "$($Pass1.current_organisation)" -P2Val "$($Pass2.current_organisation)" still_in_role = Pick-Best -P1Val "$($Pass1.still_in_role)" -P2Val "$($Pass2.still_in_role)" telematics_activity = [ordered]@{ found = $telFound summary = $telSummary since_email_date = $telSince } linkedin_active = Pick-Best -P1Val "$($Pass1.linkedin_active)" -P2Val "$($Pass2.linkedin_active)" confidence = $finalConf source_urls = $allUrls research_method = 'two-pass' pass1_providers = if ($Pass1.providers_used) { $Pass1.providers_used } else { @() } deep_research_model = 'gpt-5.5-pro' researched_at = (Get-Date).ToString('o') } } |