scour.psm1
## On module removal, release file lock on file indexes $MyInvocation.MyCommand.ScriptBlock.Module.OnRemove = { foreach($searcher in $SCRIPT:searchers.Values) { $searcher.IndexReader.Dispose() $searcher = $null } foreach($indexDirectory in $SCRIPT:indexDirectories.Values) { $indexDirectory.Dispose() $indexDirectory = $null } [GC]::Collect() } ## Creates an index of the files in the current location, storing the index ## in the __scour subdirectory. function Initialize-ScourIndex { [CmdletBinding()] param( ## The pattern to use for file indexing. Defaults to *.txt + common source extensions [string[]] $Path = ("*.txt","*.ps1","*.psm1","*.cs","*.c","*.cpp","*.h","*.py","*.java") ) try { ## Open the index from the "__scour" subdirectory of the current location $indexDirectory = [Lucene.Net.Store.FSDirectory]::Open("$pwd\__scour") $analyzer = New-Object Lucene.Net.Analysis.Standard.StandardAnalyzer "LUCENE_CURRENT" $unlimited = [Lucene.Net.Index.IndexWriter+MaxFieldLength]::UNLIMITED $indexWriter = New-Object Lucene.Net.Index.IndexWriter $indexDirectory,$analyzer,$true,$unlimited ## Count the number of files so that we can get an accurate progress measurement Write-Progress -Activity "Estimating index size" $files = Get-ChildItem -AF -Path $Path -Recurse $fileCount = $files.Length ## Go through each of the files and index them $fileCounter = 0 Get-ChildItem -AF -Path $Path -Recurse | Foreach-Object { $file = $_ ## Only update progress every 1,000 files so that we don't hurt indexing performance if(($fileCounter % 1000) -eq 0) { Write-Progress -Activity "Processing $file ($fileCounter of $fileCount)" -PercentComplete ($fileCounter * 100 / $fileCount) } $content = Get-Content -LiteralPath $file.FullName -Raw $indexPath = (Resolve-Path $_.FullName -Relative).Substring(2) ## Create the Lucene document and add it to the index. Retain the path so that we can ## use it for quick searches later. $document = New-Object Lucene.Net.Documents.Document $document.Add( (New-Object Lucene.Net.Documents.Field "path", $indexPath, "YES","ANALYZED") ) $document.Add( (New-Object Lucene.Net.Documents.Field "content", $content, "YES","ANALYZED") ) $indexWriter.AddDocument($document) [GC]::Collect() $fileCounter++ } ## Optimize and commit the index Write-Progress -Activity "Optimizing index" $indexWriter.Commit() } finally { ## Clean up $indexWriter.Dispose() $indexDirectory.Dispose() [GC]::Collect() } } ## Search the indexed database for a given regular expression pattern function Search-ScourContent { [CmdletBinding()] param( ## The query to use when searching [Parameter(Mandatory, Position = 0)] [String[]] $Query, ## The regular expression to apply to results, if any [Parameter()] [String] $RegularExpression, ## The file pattern to limit the search to, if any [Parameter()] $Path = "*" ) ## Ensure they've created an index for the current location. Don't do this for them automatically, ## as it's likely to take a long time. Search parent directories if required. If the index is found ## in a parent directory, we will use the current subdirectory as a filter for results. $scourRoot = $pwd.Path $driveRoot = $pwd.Drive.Root while($scourRoot -ne $driveRoot) { if(Test-Path "$scourRoot\__scour") { break } $scourRoot = (Resolve-Path "$scourRoot\..").Path } ## If we couldn't find the index, throw an error. if(-not (Test-Path "$scourRoot\__scour")) { $PSCmdlet.ThrowTerminatingError( (New-Object System.Management.Automation.ErrorRecord ` "Scour has not yet analyzed the current directory or any of its parents. To create a Scour index, run Initialize-ScourIndex.", "NoIndexForCurrentDirectory", "OpenError", $pwd)) } ## Retain the searchers and index directories in the module scope so that we don't ## have to re-open the indexes for every search. if(-not $SCRIPT:searchers) { $SCRIPT:searchers = @{} $SCRIPT:indexDirectories = @{} } ## If we haven't created the searcher for this location yet, create it now. if(-not $searchers.ContainsKey($scourRoot)) { Write-Verbose "Getting new searcher" $indexDirectory = [Lucene.Net.Store.FSDirectory]::Open("$scourRoot\__scour") $searchers[$scourRoot] = New-Object Lucene.Net.Search.IndexSearcher ([Lucene.Net.Index.IndexReader]::Open($indexDirectory, $true)) $indexDirectories[$scourRoot] = $indexDirectory } ## Parse the user's query $searcher = $searchers[$scourRoot] $analyzer = New-Object Lucene.Net.Analysis.Standard.StandardAnalyzer "LUCENE_CURRENT" $parser = New-Object Lucene.Net.QueryParsers.QueryParser "LUCENE_CURRENT","content",$analyzer $queryObject = $parser.Parse($Query) ## Collect the search results $collector = [Lucene.Net.Search.TopScoreDocCollector]::Create($searcher.MaxDoc, $true) $searcher.Search($queryObject, $collector) ## Go through the search results $collector.TopDocs().ScoreDocs | Foreach-Object Doc | Get-Unique | Foreach-Object { $indexPath = $searcher.Doc($_).Get("path") $indexPath = Join-Path $ScourRoot $indexPath if($indexPath.StartsWith($pwd.Path)) { if($indexPath -like $Path) { if(-not $RegularExpression) { Get-Item -LiteralPath $indexPath } else { Select-String -LiteralPath $indexPath -Pattern $RegularExpression } } } } } |