tool/Analysis/functions/Invoke-DeepResearch.ps1

function Invoke-DeepResearch {
    <#
    .SYNOPSIS
        Deep web-search research using GPT-5.5-pro on triage-selected contacts.

    .DESCRIPTION
        Reads contact-triage.json to identify contacts selected for deep research.
        Uses GPT-5.5-pro via the Responses API with web_search_preview enabled to
        find current information about each contact.

        Deep research results are merged with Pass 1 results: Pass 2 values override
        "Unknown" fields from Pass 1, but existing non-Unknown Pass 1 values are
        preserved unless Pass 2 provides higher-confidence data.

        Final merged results are written to contact-research.json.

    .PARAMETER TriagePath
        Path to contact-triage.json from the triage stage.

    .PARAMETER Pass1Path
        Path to contact-research-pass1.json for merging.

    .PARAMETER EnrichmentPath
        Path to contact-enrichment.json for additional context.

    .PARAMETER OutputPath
        Path to data/ directory where contact-research.json will be written.

    .PARAMETER Model
        Model to use for deep research. Default: gpt-5.5-pro.

    .PARAMETER CachePath
        Directory for response caching.

    .PARAMETER DelayMs
        Delay between API calls in milliseconds. Default: 2000 (GPT-5.5-pro is slow).

    .PARAMETER MaxErrors
        Circuit breaker: stop after this many consecutive errors. Default: 3.

    .PARAMETER TimeoutSeconds
        Timeout for each deep research call. Default: 300 (5 min, GPT-5.5-pro can be slow).

    .OUTPUTS
        PSCustomObject with total_contacts, deep_researched, pass1_only, total_errors.
    #>

    [CmdletBinding()]
    param(
        [Parameter(Mandatory)]
        [string]$TriagePath,

        [Parameter(Mandatory)]
        [string]$Pass1Path,

        [string]$EnrichmentPath,

        [Parameter(Mandatory)]
        [string]$OutputPath,

        [string]$Model = 'gpt-5.4',

        [string]$CachePath,

        [int]$DelayMs = 2000,

        [int]$MaxErrors = 3,

        [int]$TimeoutSeconds = 300
    )

    # Load inputs
    $triage = Get-Content $TriagePath -Raw | ConvertFrom-Json
    $pass1 = Get-Content $Pass1Path -Raw | ConvertFrom-Json

    # Build lookup of pass1 results by email
    $pass1Lookup = @{}
    foreach ($contact in $pass1.contacts) {
        if ($contact.email) {
            $pass1Lookup[$contact.email.ToLowerInvariant()] = $contact
        }
    }

    # Load enrichment for context
    $enrichment = $null
    if ($EnrichmentPath -and (Test-Path $EnrichmentPath)) {
        $enrichment = Get-Content $EnrichmentPath -Raw | ConvertFrom-Json
    }

    # Get selected contacts (sorted by priority)
    $selectedEmails = @($triage.selected | Where-Object { $_.selected } | ForEach-Object { $_.email.ToLowerInvariant() })
    Write-Host " $($selectedEmails.Count) contacts selected for deep research" -ForegroundColor Gray

    $systemPrompt = @"
You are a professional research analyst performing deep web research on business contacts.
You have access to web search — USE IT to find current, verified information.

CRITICAL RULES:
- Return ONLY a valid JSON object — no markdown fences, no preamble
- Search for the person by name + company + industry context
- Verify information from multiple sources when possible
- Include URLs of sources you found
- If you find the person on LinkedIn, note their current title and company
- Look for recent news, conference appearances, company announcements
- Check if their company is active in telematics, connected cars, fleet management, IoT, or mobility

CONFIDENCE GUIDE:
- High: Found on LinkedIn + company website, or multiple independent sources
- Medium: Found on one source (LinkedIn OR company page OR news article)
- Low: Inferred from indirect sources or partial matches only
"@


    $deepResults = @{}
    $deepCount = 0
    $consecutiveErrors = 0

    foreach ($email in $selectedEmails) {
        $deepCount++

        if ($consecutiveErrors -ge $MaxErrors) {
            Write-Host " Circuit breaker: $MaxErrors consecutive errors, stopping deep research" -ForegroundColor Red
            break
        }

        $pass1Contact = $pass1Lookup[$email]
        if (-not $pass1Contact) {
            Write-Host " [$deepCount/$($selectedEmails.Count)] $email — not found in Pass 1, skipping" -ForegroundColor Yellow
            continue
        }

        # Build enrichment context
        $enrichmentContext = ""
        if ($enrichment) {
            $contactEnrich = $null
            if ($enrichment.contacts.PSObject.Properties.Name -contains $email) {
                $contactEnrich = $enrichment.contacts.$email
            }
            if ($contactEnrich -and -not $contactEnrich.is_free_provider) {
                $domain = $contactEnrich.domain
                if ($enrichment.domains.PSObject.Properties.Name -contains $domain) {
                    $domainInfo = $enrichment.domains.$domain
                    if ($domainInfo.pages) {
                        $successPages = @($domainInfo.pages.PSObject.Properties | Where-Object { $_.Value.success -eq $true })
                        if ($successPages.Count -gt 0) {
                            $snippets = ($successPages | ForEach-Object {
                                    $content = $_.Value.content
                                    if ($content -and $content.Length -gt 200) { $content = $content.Substring(0, 200) + '...' }
                                    "$($_.Name): $content"
                                }) -join "`n"
                            $enrichmentContext = @"

## Company Website Content (from $domain)
$snippets
"@

                        }
                    }
                }
            }
        }

        # Pass 1 context
        $pass1Context = ""
        if ($pass1Contact.research) {
            $r = $pass1Contact.research
            $pass1Context = @"

## Pass 1 Research (to verify or improve)
- Current title: $($r.current_title)
- Current organisation: $($r.current_organisation)
- Still in role: $($r.still_in_role)
- Telematics activity: found=$($r.telematics_activity.found), summary="$($r.telematics_activity.summary)"
- LinkedIn: $($r.linkedin_active)
- Pass 1 confidence: $($r.confidence)
"@

        }

        $userPrompt = @"
Deep research this business contact using web search:

## Contact
- Name: $($pass1Contact.name)
- Email: $($pass1Contact.email)
- Role at time of contact: $($pass1Contact.original_role)
- Organisation at time of contact: $($pass1Contact.original_organisation)
- Date of email exchange: $($pass1Contact.email_date)
$pass1Context
$enrichmentContext

## Research Tasks
1. Search the web for "$($pass1Contact.name)" + "$($pass1Contact.original_organisation)"
2. Find their CURRENT job title and employer (check LinkedIn, company websites, news)
3. Determine if they are still at $($pass1Contact.original_organisation)
4. Look for involvement in telematics, connected car, UBI, fleet management, IoT, or mobility
5. Check LinkedIn profile activity
6. Provide source URLs for your findings

## Required Output (JSON)
{
  "current_title": "Current Job Title or Unknown",
  "current_organisation": "Current Employer or Unknown",
  "still_in_role": "Yes|No|Unknown",
  "telematics_activity": {
    "found": true/false,
    "summary": "Brief description of any telematics/IoT activity found",
    "since_email_date": true/false
  },
  "linkedin_active": "Active|Inactive|Unknown|No Profile Found",
  "confidence": "High|Medium|Low",
  "source_urls": ["url1", "url2"]
}
"@


        Write-Host " [$deepCount/$($selectedEmails.Count)] $($pass1Contact.name) ($email)" -NoNewline -ForegroundColor White

        try {
            $aiParams = @{
                SystemPrompt = $systemPrompt
                UserPrompt   = $userPrompt
                JsonMode     = $true
                MaxTokens    = 8192
                Temperature  = 0.3
                Provider     = 'openai'
                Model        = $Model
                WebSearch    = $true
                TimeoutSec   = $TimeoutSeconds
            }

            if ($CachePath) {
                $cacheKey = ($email -replace '[^\w\-\.]', '_')
                $aiParams.CachePath = $CachePath
                $aiParams.CacheKey = $cacheKey
                $response = Invoke-PCCompletionCached @aiParams
            }
            else {
                $response = Invoke-PCCompletion @aiParams
            }
            if (-not $response) { throw 'Empty response from AI' }

            # Sanitize and parse (using shared repair function)
            $parsed = Repair-JsonResponse -Response $response
            $deepResults[$email] = $parsed
            $consecutiveErrors = 0
            Write-Host " ✓ ($($parsed.confidence))" -ForegroundColor Green
        }
        catch {
            $consecutiveErrors++
            Write-Host " ✗ $($_.Exception.Message)" -ForegroundColor Red
        }

        if ($deepCount -lt $selectedEmails.Count) {
            Start-Sleep -Milliseconds $DelayMs
        }
    }

    # --- Merge Pass 1 + Pass 2 into final research ---
    Write-Host " Merging Pass 1 + Pass 2 results..." -ForegroundColor Gray

    $finalContacts = @()
    $deepResearchedCount = 0

    foreach ($contact in $pass1.contacts) {
        $email = $contact.email.ToLowerInvariant()
        $deep = $deepResults[$email]

        if ($deep) {
            $deepResearchedCount++
            # Merge: Pass 2 overrides "Unknown" fields from Pass 1
            $merged = Merge-DeepResearch -Pass1 $contact.research -Pass2 $deep

            $finalContacts += [ordered]@{
                name                  = $contact.name
                email                 = $contact.email
                original_role         = $contact.original_role
                original_organisation = $contact.original_organisation
                email_date            = $contact.email_date
                research              = $merged
                research_source       = 'pass1+pass2'
                research_error        = $null
            }
        }
        else {
            # Pass 1 only
            $finalContacts += [ordered]@{
                name                  = $contact.name
                email                 = $contact.email
                original_role         = $contact.original_role
                original_organisation = $contact.original_organisation
                email_date            = $contact.email_date
                research              = $contact.research
                research_source       = 'pass1'
                research_error        = $contact.research_error
            }
        }
    }

    # Write final output
    $output = [ordered]@{
        metadata = [ordered]@{
            generated_at    = (Get-Date).ToString('o')
            total_contacts  = $pass1.contacts.Count
            deep_researched = $deepResearchedCount
            pass1_only      = $pass1.contacts.Count - $deepResearchedCount
            total_errors    = $consecutiveErrors
            model           = $Model
        }
        contacts = $finalContacts
    }

    $outputFile = Join-Path $OutputPath 'contact-research.json'
    $output | ConvertTo-Json -Depth 10 | Set-Content -Path $outputFile -Encoding UTF8

    Write-Host " Final research: $deepResearchedCount deep + $($pass1.contacts.Count - $deepResearchedCount) pass1-only" -ForegroundColor Green

    [PSCustomObject]@{
        total_contacts  = $pass1.contacts.Count
        deep_researched = $deepResearchedCount
        pass1_only      = $pass1.contacts.Count - $deepResearchedCount
        total_errors    = $consecutiveErrors
        output_path     = $outputFile
    }
}

function Merge-DeepResearch {
    <#
    .SYNOPSIS
        Merge Pass 2 deep research into Pass 1 results.

    .DESCRIPTION
        Pass 2 values override Pass 1 "Unknown" fields. If Pass 1 already has a
        non-Unknown value, Pass 2 only overrides if it also has a non-Unknown value
        (preferring the deeper research).
    #>

    param(
        [object]$Pass1,
        [object]$Pass2
    )

    if (-not $Pass1) { $Pass1 = [PSCustomObject]@{} }

    # Helper: pick best value (Pass 2 wins if non-Unknown)
    function Pick-Best {
        param([string]$P1Val, [string]$P2Val)
        if ($P2Val -and $P2Val -ne 'Unknown' -and $P2Val -ne '') { return $P2Val }
        if ($P1Val -and $P1Val -ne 'Unknown' -and $P1Val -ne '') { return $P1Val }
        return 'Unknown'
    }

    # Merge source URLs
    $p1Urls = @()
    if ($Pass1.source_urls) { $p1Urls = @($Pass1.source_urls) }
    $p2Urls = @()
    if ($Pass2.source_urls) { $p2Urls = @($Pass2.source_urls) }
    $allUrls = @($p1Urls + $p2Urls | Where-Object { $_ } | Sort-Object -Unique)

    # Merge telematics
    $telFound = [bool]$Pass2.telematics_activity.found -or [bool]$Pass1.telematics_activity.found
    $telSince = [bool]$Pass2.telematics_activity.since_email_date -or [bool]$Pass1.telematics_activity.since_email_date
    $telSummary = if ($Pass2.telematics_activity.summary -and $Pass2.telematics_activity.summary -ne '') {
        $Pass2.telematics_activity.summary
    }
    elseif ($Pass1.telematics_activity.summary -and $Pass1.telematics_activity.summary -ne '') {
        $Pass1.telematics_activity.summary
    }
    else { '' }

    # Confidence: upgrade if Pass 2 found more
    $confLevels = @{ 'High' = 3; 'Medium' = 2; 'Low' = 1 }
    $p1ConfStr = "$($Pass1.confidence)"
    $p2ConfStr = "$($Pass2.confidence)"
    $p1Conf = if ($confLevels.ContainsKey($p1ConfStr)) { $confLevels[$p1ConfStr] } else { 0 }
    $p2Conf = if ($confLevels.ContainsKey($p2ConfStr)) { $confLevels[$p2ConfStr] } else { 0 }
    $finalConf = if ($p2Conf -ge $p1Conf) { $p2ConfStr } else { $p1ConfStr }
    if (-not $finalConf -or $finalConf -eq '') { $finalConf = 'Low' }

    return [ordered]@{
        current_title        = Pick-Best -P1Val "$($Pass1.current_title)" -P2Val "$($Pass2.current_title)"
        current_organisation = Pick-Best -P1Val "$($Pass1.current_organisation)" -P2Val "$($Pass2.current_organisation)"
        still_in_role        = Pick-Best -P1Val "$($Pass1.still_in_role)" -P2Val "$($Pass2.still_in_role)"
        telematics_activity  = [ordered]@{
            found            = $telFound
            summary          = $telSummary
            since_email_date = $telSince
        }
        linkedin_active      = Pick-Best -P1Val "$($Pass1.linkedin_active)" -P2Val "$($Pass2.linkedin_active)"
        confidence           = $finalConf
        source_urls          = $allUrls
        research_method      = 'two-pass'
        pass1_providers      = if ($Pass1.providers_used) { $Pass1.providers_used } else { @() }
        deep_research_model  = 'gpt-5.5-pro'
        researched_at        = (Get-Date).ToString('o')
    }
}