tool/Analysis/functions/Invoke-ResearchContacts.ps1

function Get-ProviderSystemPrompt {
    <#
    .SYNOPSIS
        Returns a provider-specific system prompt for contact research.
    #>

    param(
        [string]$Provider,
        [PSCustomObject]$OwnerProfile
    )

    $base = @"
You are a professional research analyst verifying the current status of business contacts.

CRITICAL RULES:
- Return ONLY a valid JSON object — no markdown fences, no preamble, no explanation
- Do NOT simulate tool calls, web searches, or any other actions
- Do NOT wrap output in <tool_call> or any XML-like tags
- Your ENTIRE response must be parseable as a single JSON object
- If you cannot determine something, use "Unknown"
"@


    if ($OwnerProfile) {
        $base += "`n`nCONTEXT — LEAD-MINING COMPANY:`nThese contacts are being researched on behalf of $($OwnerProfile.name).`nIndustry: $($OwnerProfile.industry)"
        if ($OwnerProfile.verticals) {
            $base += "`nTarget verticals: $($OwnerProfile.verticals -join '; ')"
        }
        if ($OwnerProfile.products) {
            $base += "`nProducts: $($OwnerProfile.products -join ', ')"
        }
        $base += "`n`nWhen assessing telematics/industry activity, consider whether the contact's organisation operates in verticals that align with the lead-mining company's offerings."
    }

    switch ($Provider) {
        'anthropic' {
            return @"
$base
- Use your training knowledge to assess the contact's current status
- Be conservative with confidence ratings — only rate Medium or High when you have strong reasons
- Focus on reasoning from known facts about the organisation and industry
"@

        }
        'openai' {
            return @"
$base
- Use your broad knowledge to find current information about this person
- Pay attention to company mergers, acquisitions, and rebranding since the email date
- Cross-reference the person's name with known industry events, conferences, and publications
- Be specific about your sources of confidence
"@

        }
        'gemini' {
            return @"
$base
- Leverage your knowledge of public company data, news, and professional networks
- Check if the organisation has changed names, been acquired, or expanded since the email date
- Look for the person in the context of industry directories, conference speakers, and published research
- Provide specific details when available
"@

        }
        default { return $base }
    }
}

function Invoke-SingleProviderResearch {
    <#
    .SYNOPSIS
        Calls a single AI provider for contact research and returns parsed result.
    #>

    param(
        [Parameter(Mandatory)][hashtable]$Contact,
        [Parameter(Mandatory)][string]$Provider,
        [Parameter(Mandatory)][string]$UserPrompt,
        [string]$Model,
        [string]$CachePath,
        [PSCustomObject]$OwnerProfile
    )

    $systemPrompt = Get-ProviderSystemPrompt -Provider $Provider -OwnerProfile $OwnerProfile

    $aiParams = @{
        SystemPrompt = $systemPrompt
        UserPrompt   = $UserPrompt
        JsonMode     = $true
        MaxTokens    = 8192
        Temperature  = 0.2
        Provider     = $Provider
    }
    if ($Model) { $aiParams.Model = $Model }

    if ($CachePath) {
        $providerCachePath = Join-Path $CachePath $Provider
        $cacheKey = ($Contact.email -replace '[^\w\-\.]', '_')
        $aiParams.CachePath = $providerCachePath
        $aiParams.CacheKey = $cacheKey
        $response = Invoke-PCCompletionCached @aiParams
    }
    else {
        $response = Invoke-PCCompletion @aiParams
    }
    if (-not $response) { throw 'Empty response from AI' }

    # Sanitize and parse (using shared repair function)
    return (Repair-JsonResponse -Response $response)
}

function Merge-ProviderResearch {
    <#
    .SYNOPSIS
        Merges research results from multiple providers using union + conflict flagging.

    .DESCRIPTION
        For each field, picks the most specific non-"Unknown" value. When providers
        disagree on factual claims (e.g., different current employers), flags the
        conflict. Confidence is upgraded based on corroboration across providers.
    #>

    param(
        [Parameter(Mandatory)]
        [hashtable]$ProviderResults  # provider_name → parsed JSON object
    )

    $providers = @($ProviderResults.Keys)

    # Helper: pick best non-Unknown value for a string field
    function Select-BestValue {
        param([string]$FieldName, [string]$Default = 'Unknown')
        $values = @{}
        foreach ($p in $providers) {
            $val = $ProviderResults[$p].$FieldName
            if ($val -and $val -ne 'Unknown' -and $val -ne '') {
                $values[$p] = $val
            }
        }
        if ($values.Count -eq 0) { return @{ value = $Default; sources = @(); conflict = $false } }
        if ($values.Count -eq 1) {
            $src = @($values.Keys)[0]
            return @{ value = $values[$src]; sources = @($src); conflict = $false }
        }
        # Multiple providers have values — check for agreement
        $uniqueValues = @($values.Values | Sort-Object -Unique)
        if ($uniqueValues.Count -eq 1) {
            # All agree
            return @{ value = $uniqueValues[0]; sources = @($values.Keys); conflict = $false }
        }
        # Conflict — pick the one with most corroboration, or first non-Unknown
        $grouped = $values.GetEnumerator() | Group-Object Value | Sort-Object Count -Descending
        $best = $grouped[0]
        return @{
            value        = $best.Name
            sources      = @($best.Group | ForEach-Object { $_.Key })
            conflict     = $true
            alternatives = @($grouped | Where-Object { $_.Name -ne $best.Name } | ForEach-Object {
                    [ordered]@{ value = $_.Name; providers = @($_.Group | ForEach-Object { $_.Key }) }
                })
        }
    }

    # Merge scalar fields
    $title = Select-BestValue -FieldName 'current_title'
    $org = Select-BestValue -FieldName 'current_organisation'
    $role = Select-BestValue -FieldName 'still_in_role'
    $linkedin = Select-BestValue -FieldName 'linkedin_active'

    # Merge telematics_activity — union approach
    $telematicsFound = $false
    $telematicsSummaries = @()
    $telematicsSinceEmail = $false
    foreach ($p in $providers) {
        $ta = $ProviderResults[$p].telematics_activity
        if ($ta) {
            if ($ta.found -eq $true) {
                $telematicsFound = $true
                if ($ta.since_email_date -eq $true) { $telematicsSinceEmail = $true }
            }
            if ($ta.summary -and $ta.summary -ne '' -and $ta.summary -ne 'Unknown') {
                $telematicsSummaries += "$($p): $($ta.summary)"
            }
        }
    }

    # Merge source_urls — union all
    $allUrls = @()
    foreach ($p in $providers) {
        $urls = $ProviderResults[$p].source_urls
        if ($urls) { $allUrls += @($urls) }
    }
    $allUrls = @($allUrls | Where-Object { $_ } | Sort-Object -Unique)

    # Determine merged confidence
    $confidenceLevels = @{ 'High' = 3; 'Medium' = 2; 'Low' = 1 }
    $maxConfidence = 'Low'
    $agreementCount = 0
    foreach ($p in $providers) {
        $conf = $ProviderResults[$p].confidence
        if ($confidenceLevels[$conf] -gt $confidenceLevels[$maxConfidence]) {
            $maxConfidence = $conf
        }
    }
    # Upgrade confidence if multiple providers corroborate key fields
    $nonUnknownOrg = ($providers | Where-Object { $ProviderResults[$_].current_organisation -and $ProviderResults[$_].current_organisation -ne 'Unknown' }).Count
    $nonUnknownTitle = ($providers | Where-Object { $ProviderResults[$_].current_title -and $ProviderResults[$_].current_title -ne 'Unknown' }).Count
    if ($nonUnknownOrg -ge 2 -and -not $org.conflict) {
        if ($maxConfidence -eq 'Low') { $maxConfidence = 'Medium' }
    }
    if ($nonUnknownOrg -ge 2 -and $nonUnknownTitle -ge 2 -and -not $org.conflict -and -not $title.conflict) {
        $maxConfidence = 'High'
    }

    # Build conflicts array
    $conflicts = @()
    if ($title.conflict) { $conflicts += [ordered]@{ field = 'current_title'; chosen = $title.value; alternatives = $title.alternatives } }
    if ($org.conflict) { $conflicts += [ordered]@{ field = 'current_organisation'; chosen = $org.value; alternatives = $org.alternatives } }
    if ($role.conflict) { $conflicts += [ordered]@{ field = 'still_in_role'; chosen = $role.value; alternatives = $role.alternatives } }

    return [ordered]@{
        current_title        = $title.value
        current_organisation = $org.value
        still_in_role        = $role.value
        telematics_activity  = [ordered]@{
            found            = $telematicsFound
            summary          = if ($telematicsSummaries.Count -gt 0) { $telematicsSummaries -join '; ' } else { '' }
            since_email_date = $telematicsSinceEmail
        }
        linkedin_active      = $linkedin.value
        confidence           = $maxConfidence
        source_urls          = $allUrls
        research_method      = 'multi-provider'
        providers_used       = $providers
        provider_agreement   = [ordered]@{
            title_sources = $title.sources
            org_sources   = $org.sources
        }
        conflicts            = $conflicts
        researched_at        = (Get-Date).ToString('o')
    }
}

function Invoke-ResearchContacts {
    <#
    .SYNOPSIS
        Multi-provider AI contact research with response merging.

    .DESCRIPTION
        Reads email-analyses.json, deduplicates external contacts, and for each
        unique contact calls multiple AI providers (Anthropic, OpenAI, Gemini) to
        research their current role, organisation, and telematics activity.

        Each provider's raw response is cached separately under:
            .cache/responses/research/{provider}/

        Results from all providers are merged using union + conflict flagging:
        - Most specific non-"Unknown" value wins per field
        - Multi-provider agreement upgrades confidence
        - Conflicts are flagged (not silently resolved)

        Merged results are written to contact-research.json.

    .PARAMETER AnalysesPath
        Path to email-analyses.json from the analyse stage.

    .PARAMETER OutputPath
        Path to data/ directory where contact-research.json will be written.

    .PARAMETER Model
        Override model (applies to all providers). Leave empty for provider defaults.

    .PARAMETER Providers
        Array of providers to query. Default: all three.

    .PARAMETER CachePath
        Directory for response caching. Each provider gets a subdirectory.

    .PARAMETER DelayMs
        Delay between API calls in milliseconds. Default: 800.

    .PARAMETER MaxErrors
        Circuit breaker: stop after this many consecutive errors across ALL providers. Default: 5.

    .OUTPUTS
        PSCustomObject with total_contacts, total_researched, total_errors.
    #>

    [CmdletBinding()]
    param(
        [Parameter(Mandatory)]
        [string]$AnalysesPath,

        [Parameter(Mandatory)]
        [string]$OutputPath,

        [string]$Model,

        [string[]]$Providers = @('anthropic', 'openai', 'gemini'),

        [string]$CachePath,

        [string]$EnrichmentPath,

        [string]$OutputFileName = 'contact-research.json',

        [PSCustomObject]$OwnerProfile,

        [int]$DelayMs = 800,

        [int]$MaxErrors = 5
    )

    # Load analyses
    $analyses = Get-Content $AnalysesPath -Raw | ConvertFrom-Json

    # Deduplicate external contacts across all emails
    $contactMap = [ordered]@{}
    foreach ($analysis in $analyses.analyses) {
        if ($analysis.analysis_error) { continue }

        foreach ($participant in $analysis.participants) {
            if ($participant.type -ne 'external') { continue }
            if (-not $participant.email -or $participant.email -eq 'Unknown') { continue }

            $key = $participant.email.ToLowerInvariant()
            if (-not $contactMap.Contains($key)) {
                $contactMap[$key] = [ordered]@{
                    name                  = $participant.name
                    email                 = $participant.email
                    original_role         = $participant.role
                    original_organisation = $participant.organisation
                    email_date            = $analysis.date
                    source_file           = $analysis.file_path
                }
            }
        }
    }

    $contacts = @($contactMap.Values)
    Write-Host " Found $($contacts.Count) unique external contacts" -ForegroundColor Gray
    Write-Host " Providers: $($Providers -join ', ')" -ForegroundColor Gray

    $results = @()
    $errors = @()
    $contactCount = 0
    $consecutiveErrors = 0

    foreach ($contact in $contacts) {
        $contactCount++

        # Circuit breaker
        if ($consecutiveErrors -ge $MaxErrors) {
            Write-Host " Circuit breaker: $MaxErrors consecutive errors, stopping research" -ForegroundColor Red
            for ($i = $contactCount - 1; $i -lt $contacts.Count; $i++) {
                $results += [ordered]@{
                    name                  = $contacts[$i].name
                    email                 = $contacts[$i].email
                    original_role         = $contacts[$i].original_role
                    original_organisation = $contacts[$i].original_organisation
                    email_date            = $contacts[$i].email_date
                    research              = $null
                    research_error        = "Skipped (circuit breaker)"
                }
                $errors += [ordered]@{ contact = $contacts[$i].email; error = "Circuit breaker" }
            }
            break
        }

        $userPrompt = @"
Research this business contact's current status:

## Contact Details
- Name: $($contact.name)
- Email: $($contact.email)
- Role at time of contact: $($contact.original_role)
- Organisation at time of contact: $($contact.original_organisation)
- Date of email exchange: $($contact.email_date)

## Research Tasks
1. Find their CURRENT job title and employer
2. Determine if they are still at $($contact.original_organisation)
3. Look for any involvement in telematics, connected car, UBI, fleet management, or mobility SINCE $($contact.email_date)
4. Check LinkedIn activity status
5. Assess confidence level of findings

## Required Output (JSON)
{
  "current_title": "Current Job Title or Unknown",
  "current_organisation": "Current Employer or Unknown",
  "still_in_role": "Yes|No|Unknown",
  "telematics_activity": {
    "found": true/false,
    "summary": "Brief description",
    "since_email_date": true/false
  },
  "linkedin_active": "Active|Inactive|Unknown|No Profile Found",
  "confidence": "High|Medium|Low",
  "source_urls": ["url1", "url2"]
}
"@


        Write-Host " [$contactCount/$($contacts.Count)] $($contact.name) ($($contact.email))" -NoNewline -ForegroundColor Gray

        # Query each provider
        $providerResults = @{}
        $providerErrors = @()

        foreach ($provider in $Providers) {
            try {
                $parsed = Invoke-SingleProviderResearch `
                    -Contact $contact `
                    -Provider $provider `
                    -UserPrompt $userPrompt `
                    -Model:$Model `
                    -CachePath:$CachePath `
                    -OwnerProfile:$OwnerProfile

                $providerResults[$provider] = $parsed
                Write-Host " $($provider[0].ToString().ToUpper())✓" -NoNewline -ForegroundColor Green
            }
            catch {
                $providerErrors += [ordered]@{ provider = $provider; error = $_.Exception.Message }
                Write-Host " $($provider[0].ToString().ToUpper())✗" -NoNewline -ForegroundColor Red
            }

            # Rate limiting between provider calls
            Start-Sleep -Milliseconds ([math]::Max(200, $DelayMs / $Providers.Count))
        }

        # Merge or fail
        if ($providerResults.Count -gt 0) {
            try {
                $merged = Merge-ProviderResearch -ProviderResults $providerResults
                $merged.provider_errors = $providerErrors

                $results += [ordered]@{
                    name                  = $contact.name
                    email                 = $contact.email
                    original_role         = $contact.original_role
                    original_organisation = $contact.original_organisation
                    email_date            = $contact.email_date
                    research              = $merged
                    research_error        = $null
                }
                $consecutiveErrors = 0
                Write-Host "" -ForegroundColor Green  # newline
            }
            catch {
                $consecutiveErrors++
                $results += [ordered]@{
                    name                  = $contact.name
                    email                 = $contact.email
                    original_role         = $contact.original_role
                    original_organisation = $contact.original_organisation
                    email_date            = $contact.email_date
                    research              = $null
                    research_error        = "Merge failed: $($_.Exception.Message)"
                }
                $errors += [ordered]@{ contact = $contact.email; error = "Merge failed: $($_.Exception.Message)" }
                Write-Host " MERGE✗" -ForegroundColor Red
            }
        }
        else {
            $consecutiveErrors++
            $allErrors = ($providerErrors | ForEach-Object { "$($_.provider): $($_.error)" }) -join '; '
            $results += [ordered]@{
                name                  = $contact.name
                email                 = $contact.email
                original_role         = $contact.original_role
                original_organisation = $contact.original_organisation
                email_date            = $contact.email_date
                research              = $null
                research_error        = "All providers failed: $allErrors"
            }
            $errors += [ordered]@{ contact = $contact.email; error = "All providers failed" }
            Write-Host " ALL✗" -ForegroundColor Red
        }

        # Rate limiting between contacts
        if ($contactCount -lt $contacts.Count) {
            Start-Sleep -Milliseconds $DelayMs
        }
    }

    # Write results
    $output = [ordered]@{
        metadata = [ordered]@{
            generated_at     = (Get-Date).ToString('o')
            providers        = $Providers
            total_contacts   = $contacts.Count
            total_researched = ($results | Where-Object { $null -eq $_.research_error }).Count
            total_errors     = $errors.Count
        }
        contacts = $results
    }

    $outputFile = Join-Path $OutputPath $OutputFileName
    $output | ConvertTo-Json -Depth 10 | Set-Content -Path $outputFile -Encoding UTF8

    if ($errors.Count -gt 0) {
        $errorsFile = Join-Path $OutputPath 'errors.json'
        if (Test-Path $errorsFile) {
            $existing = Get-Content $errorsFile -Raw | ConvertFrom-Json
            $all = @($existing) + $errors
            $all | ConvertTo-Json -Depth 5 | Set-Content -Path $errorsFile -Encoding UTF8
        }
        else {
            $errors | ConvertTo-Json -Depth 5 | Set-Content -Path $errorsFile -Encoding UTF8
        }
    }

    [PSCustomObject]@{
        total_contacts   = $contacts.Count
        total_researched = $output.metadata.total_researched
        total_errors     = $errors.Count
        output_path      = $outputFile
    }
}