tool/Analysis/functions/Invoke-ResearchContacts.ps1
|
function Get-ProviderSystemPrompt { <# .SYNOPSIS Returns a provider-specific system prompt for contact research. #> param( [string]$Provider, [PSCustomObject]$OwnerProfile ) $base = @" You are a professional research analyst verifying the current status of business contacts. CRITICAL RULES: - Return ONLY a valid JSON object — no markdown fences, no preamble, no explanation - Do NOT simulate tool calls, web searches, or any other actions - Do NOT wrap output in <tool_call> or any XML-like tags - Your ENTIRE response must be parseable as a single JSON object - If you cannot determine something, use "Unknown" "@ if ($OwnerProfile) { $base += "`n`nCONTEXT — LEAD-MINING COMPANY:`nThese contacts are being researched on behalf of $($OwnerProfile.name).`nIndustry: $($OwnerProfile.industry)" if ($OwnerProfile.verticals) { $base += "`nTarget verticals: $($OwnerProfile.verticals -join '; ')" } if ($OwnerProfile.products) { $base += "`nProducts: $($OwnerProfile.products -join ', ')" } $base += "`n`nWhen assessing telematics/industry activity, consider whether the contact's organisation operates in verticals that align with the lead-mining company's offerings." } switch ($Provider) { 'anthropic' { return @" $base - Use your training knowledge to assess the contact's current status - Be conservative with confidence ratings — only rate Medium or High when you have strong reasons - Focus on reasoning from known facts about the organisation and industry "@ } 'openai' { return @" $base - Use your broad knowledge to find current information about this person - Pay attention to company mergers, acquisitions, and rebranding since the email date - Cross-reference the person's name with known industry events, conferences, and publications - Be specific about your sources of confidence "@ } 'gemini' { return @" $base - Leverage your knowledge of public company data, news, and professional networks - Check if the organisation has changed names, been acquired, or expanded since the email date - Look for the person in the context of industry directories, conference speakers, and published research - Provide specific details when available "@ } default { return $base } } } function Invoke-SingleProviderResearch { <# .SYNOPSIS Calls a single AI provider for contact research and returns parsed result. #> param( [Parameter(Mandatory)][hashtable]$Contact, [Parameter(Mandatory)][string]$Provider, [Parameter(Mandatory)][string]$UserPrompt, [string]$Model, [string]$CachePath, [PSCustomObject]$OwnerProfile ) $systemPrompt = Get-ProviderSystemPrompt -Provider $Provider -OwnerProfile $OwnerProfile $aiParams = @{ SystemPrompt = $systemPrompt UserPrompt = $UserPrompt JsonMode = $true MaxTokens = 8192 Temperature = 0.2 Provider = $Provider } if ($Model) { $aiParams.Model = $Model } if ($CachePath) { $providerCachePath = Join-Path $CachePath $Provider $cacheKey = ($Contact.email -replace '[^\w\-\.]', '_') $aiParams.CachePath = $providerCachePath $aiParams.CacheKey = $cacheKey $response = Invoke-PCCompletionCached @aiParams } else { $response = Invoke-PCCompletion @aiParams } if (-not $response) { throw 'Empty response from AI' } # Sanitize and parse (using shared repair function) return (Repair-JsonResponse -Response $response) } function Merge-ProviderResearch { <# .SYNOPSIS Merges research results from multiple providers using union + conflict flagging. .DESCRIPTION For each field, picks the most specific non-"Unknown" value. When providers disagree on factual claims (e.g., different current employers), flags the conflict. Confidence is upgraded based on corroboration across providers. #> param( [Parameter(Mandatory)] [hashtable]$ProviderResults # provider_name → parsed JSON object ) $providers = @($ProviderResults.Keys) # Helper: pick best non-Unknown value for a string field function Select-BestValue { param([string]$FieldName, [string]$Default = 'Unknown') $values = @{} foreach ($p in $providers) { $val = $ProviderResults[$p].$FieldName if ($val -and $val -ne 'Unknown' -and $val -ne '') { $values[$p] = $val } } if ($values.Count -eq 0) { return @{ value = $Default; sources = @(); conflict = $false } } if ($values.Count -eq 1) { $src = @($values.Keys)[0] return @{ value = $values[$src]; sources = @($src); conflict = $false } } # Multiple providers have values — check for agreement $uniqueValues = @($values.Values | Sort-Object -Unique) if ($uniqueValues.Count -eq 1) { # All agree return @{ value = $uniqueValues[0]; sources = @($values.Keys); conflict = $false } } # Conflict — pick the one with most corroboration, or first non-Unknown $grouped = $values.GetEnumerator() | Group-Object Value | Sort-Object Count -Descending $best = $grouped[0] return @{ value = $best.Name sources = @($best.Group | ForEach-Object { $_.Key }) conflict = $true alternatives = @($grouped | Where-Object { $_.Name -ne $best.Name } | ForEach-Object { [ordered]@{ value = $_.Name; providers = @($_.Group | ForEach-Object { $_.Key }) } }) } } # Merge scalar fields $title = Select-BestValue -FieldName 'current_title' $org = Select-BestValue -FieldName 'current_organisation' $role = Select-BestValue -FieldName 'still_in_role' $linkedin = Select-BestValue -FieldName 'linkedin_active' # Merge telematics_activity — union approach $telematicsFound = $false $telematicsSummaries = @() $telematicsSinceEmail = $false foreach ($p in $providers) { $ta = $ProviderResults[$p].telematics_activity if ($ta) { if ($ta.found -eq $true) { $telematicsFound = $true if ($ta.since_email_date -eq $true) { $telematicsSinceEmail = $true } } if ($ta.summary -and $ta.summary -ne '' -and $ta.summary -ne 'Unknown') { $telematicsSummaries += "$($p): $($ta.summary)" } } } # Merge source_urls — union all $allUrls = @() foreach ($p in $providers) { $urls = $ProviderResults[$p].source_urls if ($urls) { $allUrls += @($urls) } } $allUrls = @($allUrls | Where-Object { $_ } | Sort-Object -Unique) # Determine merged confidence $confidenceLevels = @{ 'High' = 3; 'Medium' = 2; 'Low' = 1 } $maxConfidence = 'Low' $agreementCount = 0 foreach ($p in $providers) { $conf = $ProviderResults[$p].confidence if ($confidenceLevels[$conf] -gt $confidenceLevels[$maxConfidence]) { $maxConfidence = $conf } } # Upgrade confidence if multiple providers corroborate key fields $nonUnknownOrg = ($providers | Where-Object { $ProviderResults[$_].current_organisation -and $ProviderResults[$_].current_organisation -ne 'Unknown' }).Count $nonUnknownTitle = ($providers | Where-Object { $ProviderResults[$_].current_title -and $ProviderResults[$_].current_title -ne 'Unknown' }).Count if ($nonUnknownOrg -ge 2 -and -not $org.conflict) { if ($maxConfidence -eq 'Low') { $maxConfidence = 'Medium' } } if ($nonUnknownOrg -ge 2 -and $nonUnknownTitle -ge 2 -and -not $org.conflict -and -not $title.conflict) { $maxConfidence = 'High' } # Build conflicts array $conflicts = @() if ($title.conflict) { $conflicts += [ordered]@{ field = 'current_title'; chosen = $title.value; alternatives = $title.alternatives } } if ($org.conflict) { $conflicts += [ordered]@{ field = 'current_organisation'; chosen = $org.value; alternatives = $org.alternatives } } if ($role.conflict) { $conflicts += [ordered]@{ field = 'still_in_role'; chosen = $role.value; alternatives = $role.alternatives } } return [ordered]@{ current_title = $title.value current_organisation = $org.value still_in_role = $role.value telematics_activity = [ordered]@{ found = $telematicsFound summary = if ($telematicsSummaries.Count -gt 0) { $telematicsSummaries -join '; ' } else { '' } since_email_date = $telematicsSinceEmail } linkedin_active = $linkedin.value confidence = $maxConfidence source_urls = $allUrls research_method = 'multi-provider' providers_used = $providers provider_agreement = [ordered]@{ title_sources = $title.sources org_sources = $org.sources } conflicts = $conflicts researched_at = (Get-Date).ToString('o') } } function Invoke-ResearchContacts { <# .SYNOPSIS Multi-provider AI contact research with response merging. .DESCRIPTION Reads email-analyses.json, deduplicates external contacts, and for each unique contact calls multiple AI providers (Anthropic, OpenAI, Gemini) to research their current role, organisation, and telematics activity. Each provider's raw response is cached separately under: .cache/responses/research/{provider}/ Results from all providers are merged using union + conflict flagging: - Most specific non-"Unknown" value wins per field - Multi-provider agreement upgrades confidence - Conflicts are flagged (not silently resolved) Merged results are written to contact-research.json. .PARAMETER AnalysesPath Path to email-analyses.json from the analyse stage. .PARAMETER OutputPath Path to data/ directory where contact-research.json will be written. .PARAMETER Model Override model (applies to all providers). Leave empty for provider defaults. .PARAMETER Providers Array of providers to query. Default: all three. .PARAMETER CachePath Directory for response caching. Each provider gets a subdirectory. .PARAMETER DelayMs Delay between API calls in milliseconds. Default: 800. .PARAMETER MaxErrors Circuit breaker: stop after this many consecutive errors across ALL providers. Default: 5. .OUTPUTS PSCustomObject with total_contacts, total_researched, total_errors. #> [CmdletBinding()] param( [Parameter(Mandatory)] [string]$AnalysesPath, [Parameter(Mandatory)] [string]$OutputPath, [string]$Model, [string[]]$Providers = @('anthropic', 'openai', 'gemini'), [string]$CachePath, [string]$EnrichmentPath, [string]$OutputFileName = 'contact-research.json', [PSCustomObject]$OwnerProfile, [int]$DelayMs = 800, [int]$MaxErrors = 5 ) # Load analyses $analyses = Get-Content $AnalysesPath -Raw | ConvertFrom-Json # Deduplicate external contacts across all emails $contactMap = [ordered]@{} foreach ($analysis in $analyses.analyses) { if ($analysis.analysis_error) { continue } foreach ($participant in $analysis.participants) { if ($participant.type -ne 'external') { continue } if (-not $participant.email -or $participant.email -eq 'Unknown') { continue } $key = $participant.email.ToLowerInvariant() if (-not $contactMap.Contains($key)) { $contactMap[$key] = [ordered]@{ name = $participant.name email = $participant.email original_role = $participant.role original_organisation = $participant.organisation email_date = $analysis.date source_file = $analysis.file_path } } } } $contacts = @($contactMap.Values) Write-Host " Found $($contacts.Count) unique external contacts" -ForegroundColor Gray Write-Host " Providers: $($Providers -join ', ')" -ForegroundColor Gray $results = @() $errors = @() $contactCount = 0 $consecutiveErrors = 0 foreach ($contact in $contacts) { $contactCount++ # Circuit breaker if ($consecutiveErrors -ge $MaxErrors) { Write-Host " Circuit breaker: $MaxErrors consecutive errors, stopping research" -ForegroundColor Red for ($i = $contactCount - 1; $i -lt $contacts.Count; $i++) { $results += [ordered]@{ name = $contacts[$i].name email = $contacts[$i].email original_role = $contacts[$i].original_role original_organisation = $contacts[$i].original_organisation email_date = $contacts[$i].email_date research = $null research_error = "Skipped (circuit breaker)" } $errors += [ordered]@{ contact = $contacts[$i].email; error = "Circuit breaker" } } break } $userPrompt = @" Research this business contact's current status: ## Contact Details - Name: $($contact.name) - Email: $($contact.email) - Role at time of contact: $($contact.original_role) - Organisation at time of contact: $($contact.original_organisation) - Date of email exchange: $($contact.email_date) ## Research Tasks 1. Find their CURRENT job title and employer 2. Determine if they are still at $($contact.original_organisation) 3. Look for any involvement in telematics, connected car, UBI, fleet management, or mobility SINCE $($contact.email_date) 4. Check LinkedIn activity status 5. Assess confidence level of findings ## Required Output (JSON) { "current_title": "Current Job Title or Unknown", "current_organisation": "Current Employer or Unknown", "still_in_role": "Yes|No|Unknown", "telematics_activity": { "found": true/false, "summary": "Brief description", "since_email_date": true/false }, "linkedin_active": "Active|Inactive|Unknown|No Profile Found", "confidence": "High|Medium|Low", "source_urls": ["url1", "url2"] } "@ Write-Host " [$contactCount/$($contacts.Count)] $($contact.name) ($($contact.email))" -NoNewline -ForegroundColor Gray # Query each provider $providerResults = @{} $providerErrors = @() foreach ($provider in $Providers) { try { $parsed = Invoke-SingleProviderResearch ` -Contact $contact ` -Provider $provider ` -UserPrompt $userPrompt ` -Model:$Model ` -CachePath:$CachePath ` -OwnerProfile:$OwnerProfile $providerResults[$provider] = $parsed Write-Host " $($provider[0].ToString().ToUpper())✓" -NoNewline -ForegroundColor Green } catch { $providerErrors += [ordered]@{ provider = $provider; error = $_.Exception.Message } Write-Host " $($provider[0].ToString().ToUpper())✗" -NoNewline -ForegroundColor Red } # Rate limiting between provider calls Start-Sleep -Milliseconds ([math]::Max(200, $DelayMs / $Providers.Count)) } # Merge or fail if ($providerResults.Count -gt 0) { try { $merged = Merge-ProviderResearch -ProviderResults $providerResults $merged.provider_errors = $providerErrors $results += [ordered]@{ name = $contact.name email = $contact.email original_role = $contact.original_role original_organisation = $contact.original_organisation email_date = $contact.email_date research = $merged research_error = $null } $consecutiveErrors = 0 Write-Host "" -ForegroundColor Green # newline } catch { $consecutiveErrors++ $results += [ordered]@{ name = $contact.name email = $contact.email original_role = $contact.original_role original_organisation = $contact.original_organisation email_date = $contact.email_date research = $null research_error = "Merge failed: $($_.Exception.Message)" } $errors += [ordered]@{ contact = $contact.email; error = "Merge failed: $($_.Exception.Message)" } Write-Host " MERGE✗" -ForegroundColor Red } } else { $consecutiveErrors++ $allErrors = ($providerErrors | ForEach-Object { "$($_.provider): $($_.error)" }) -join '; ' $results += [ordered]@{ name = $contact.name email = $contact.email original_role = $contact.original_role original_organisation = $contact.original_organisation email_date = $contact.email_date research = $null research_error = "All providers failed: $allErrors" } $errors += [ordered]@{ contact = $contact.email; error = "All providers failed" } Write-Host " ALL✗" -ForegroundColor Red } # Rate limiting between contacts if ($contactCount -lt $contacts.Count) { Start-Sleep -Milliseconds $DelayMs } } # Write results $output = [ordered]@{ metadata = [ordered]@{ generated_at = (Get-Date).ToString('o') providers = $Providers total_contacts = $contacts.Count total_researched = ($results | Where-Object { $null -eq $_.research_error }).Count total_errors = $errors.Count } contacts = $results } $outputFile = Join-Path $OutputPath $OutputFileName $output | ConvertTo-Json -Depth 10 | Set-Content -Path $outputFile -Encoding UTF8 if ($errors.Count -gt 0) { $errorsFile = Join-Path $OutputPath 'errors.json' if (Test-Path $errorsFile) { $existing = Get-Content $errorsFile -Raw | ConvertFrom-Json $all = @($existing) + $errors $all | ConvertTo-Json -Depth 5 | Set-Content -Path $errorsFile -Encoding UTF8 } else { $errors | ConvertTo-Json -Depth 5 | Set-Content -Path $errorsFile -Encoding UTF8 } } [PSCustomObject]@{ total_contacts = $contacts.Count total_researched = $output.metadata.total_researched total_errors = $errors.Count output_path = $outputFile } } |