scour.psm1

## On module removal, release file lock on file indexes
$MyInvocation.MyCommand.ScriptBlock.Module.OnRemove = {
    foreach($searcher in $SCRIPT:searchers.Values)
    {
        $searcher.IndexReader.Dispose()
        $searcher = $null
    }

    foreach($indexDirectory in $SCRIPT:indexDirectories.Values)
    {
        $indexDirectory.Dispose()
        $indexDirectory = $null
    }

    [GC]::Collect()
}

## Creates an index of the files in the current location, storing the index
## in the __scour subdirectory.
function Initialize-ScourIndex
{
    [CmdletBinding()]
    param(
        ## The pattern to use for file indexing. Defaults to *.txt + common source extensions
        [string[]] $Path = ("*.txt","*.ps1","*.psm1","*.cs","*.c","*.cpp","*.h","*.py","*.java")
    )

    try
    {
        ## Open the index from the "__scour" subdirectory of the current location
        $indexDirectory = [Lucene.Net.Store.FSDirectory]::Open("$pwd\__scour")
        $analyzer = New-Object Lucene.Net.Analysis.Standard.StandardAnalyzer "LUCENE_CURRENT"

        $unlimited = [Lucene.Net.Index.IndexWriter+MaxFieldLength]::UNLIMITED
        $indexWriter = New-Object Lucene.Net.Index.IndexWriter $indexDirectory,$analyzer,$true,$unlimited
    
        ## Count the number of files so that we can get an accurate progress measurement
        Write-Progress -Activity "Estimating index size"
        $files = Get-ChildItem -AF -Path $Path -Recurse
        $fileCount = $files.Length

        ## Go through each of the files and index them
        $fileCounter = 0
        Get-ChildItem -AF -Path $Path -Recurse | Foreach-Object {
            $file = $_

            ## Only update progress every 1,000 files so that we don't hurt indexing performance
            if(($fileCounter % 1000) -eq 0)
            {
                Write-Progress -Activity "Processing $file ($fileCounter of $fileCount)" -PercentComplete ($fileCounter * 100 / $fileCount)
            }

            $content = Get-Content -LiteralPath $file.FullName -Raw
            $indexPath = (Resolve-Path $_.FullName -Relative).Substring(2)
        
            ## Create the Lucene document and add it to the index. Retain the path so that we can
            ## use it for quick searches later.
            $document = New-Object Lucene.Net.Documents.Document
            $document.Add( (New-Object Lucene.Net.Documents.Field "path", $indexPath, "YES","ANALYZED") )
            $document.Add( (New-Object Lucene.Net.Documents.Field "content", $content, "YES","ANALYZED") )
            $indexWriter.AddDocument($document)

            [GC]::Collect()
        
            $fileCounter++
        }

        ## Optimize and commit the index
        Write-Progress -Activity "Optimizing index"
        $indexWriter.Commit()
    }
    finally
    {
        ## Clean up
        $indexWriter.Dispose()
        $indexDirectory.Dispose()
        [GC]::Collect()
    }
}

## Search the indexed database for a given regular expression pattern
function Search-ScourContent
{
    [CmdletBinding()]
    param(
        ## The query to use when searching
        [Parameter(Mandatory, Position = 0)]
        [String[]] $Query,

        ## The regular expression to apply to results, if any
        [Parameter()]
        [String] $RegularExpression,

        ## The file pattern to limit the search to, if any
        [Parameter()]
        $Path = "*"
    )

    ## Ensure they've created an index for the current location. Don't do this for them automatically,
    ## as it's likely to take a long time. Search parent directories if required. If the index is found
    ## in a parent directory, we will use the current subdirectory as a filter for results.
    $scourRoot = $pwd.Path
    $driveRoot = $pwd.Drive.Root
    while($scourRoot -ne $driveRoot)
    {
        if(Test-Path "$scourRoot\__scour")
        {
            break
        }

        $scourRoot = (Resolve-Path "$scourRoot\..").Path
    }

    ## If we couldn't find the index, throw an error.
    if(-not (Test-Path "$scourRoot\__scour"))
    {
        $PSCmdlet.ThrowTerminatingError(
            (New-Object System.Management.Automation.ErrorRecord `
                "Scour has not yet analyzed the current directory or any of its parents. To create a Scour index, run Initialize-ScourIndex.",
                "NoIndexForCurrentDirectory",
                "OpenError",
                $pwd))
    }

    ## Retain the searchers and index directories in the module scope so that we don't
    ## have to re-open the indexes for every search.
    if(-not $SCRIPT:searchers)
    {
        $SCRIPT:searchers = @{}
        $SCRIPT:indexDirectories = @{}
    }

    ## If we haven't created the searcher for this location yet, create it now.
    if(-not $searchers.ContainsKey($scourRoot))
    {
        Write-Verbose "Getting new searcher"
        $indexDirectory = [Lucene.Net.Store.FSDirectory]::Open("$scourRoot\__scour")
        $searchers[$scourRoot] = New-Object Lucene.Net.Search.IndexSearcher ([Lucene.Net.Index.IndexReader]::Open($indexDirectory, $true))
        $indexDirectories[$scourRoot] = $indexDirectory
    }

    ## Parse the user's query
    $searcher = $searchers[$scourRoot]
    $analyzer = New-Object Lucene.Net.Analysis.Standard.StandardAnalyzer "LUCENE_CURRENT"
    $parser = New-Object Lucene.Net.QueryParsers.QueryParser "LUCENE_CURRENT","content",$analyzer
    $queryObject = $parser.Parse($Query)

    ## Collect the search results
    $collector = [Lucene.Net.Search.TopScoreDocCollector]::Create($searcher.MaxDoc, $true)
    $searcher.Search($queryObject, $collector)

    ## Go through the search results
    $collector.TopDocs().ScoreDocs | Foreach-Object Doc | Get-Unique | Foreach-Object {
        $indexPath = $searcher.Doc($_).Get("path")
        $indexPath = Join-Path $ScourRoot $indexPath 

        if($indexPath.StartsWith($pwd.Path))
        {
            if($indexPath -like $Path)
            {
                if(-not $RegularExpression)
                {
                    Get-Item -LiteralPath $indexPath
                }
                else {
                    Select-String -LiteralPath $indexPath -Pattern $RegularExpression    
                }
            }
        }
    }
}