scour.psm1
## On module removal, release file lock on file indexes $MyInvocation.MyCommand.ScriptBlock.Module.OnRemove = { foreach($searcher in $SCRIPT:searchers.Values) { $searcher.IndexReader.Dispose() $searcher = $null } foreach($indexDirectory in $SCRIPT:indexDirectories.Values) { $indexDirectory.Dispose() $indexDirectory = $null } [GC]::Collect() } ## Creates an index of the files in the current location, storing the index ## in the __scour subdirectory. function Initialize-ScourIndex { [CmdletBinding()] param( ## The pattern to use for file indexing. Defaults to *.txt + common source extensions [string[]] $Path = ("*.ps1","*.psm1","*.cs","*.c","*.cpp","*.h","*.py","*.java","*.txt") ) try { ## Open the index from the "__scour" subdirectory of the current location $indexDirectory = [Lucene.Net.Store.FSDirectory]::Open("$pwd\__scour") $analyzer = New-Object Lucene.Net.Analysis.Standard.StandardAnalyzer "LUCENE_CURRENT" $unlimited = [Lucene.Net.Index.IndexWriter+MaxFieldLength]::UNLIMITED $indexWriter = New-Object Lucene.Net.Index.IndexWriter $indexDirectory,$analyzer,$true,$unlimited $parallelScript = { param($IndexWriter, $InputQueue, $OutputProgress, $ThreadId) $processed = 0 $file = "" while($true) { if($InputQueue.TryDequeue([ref] $file)) { $content = Get-Content -LiteralPath $file -Raw $hash = Get-FileHash -LiteralPath $file | % Hash $indexPath = (Resolve-Path $file -Relative).Substring(2) ## Create the Lucene document and add it to the index. Retain the path so that we can ## use it for quick searches later. $document = New-Object Lucene.Net.Documents.Document $document.Add( (New-Object Lucene.Net.Documents.Field "path", $indexPath, "YES","ANALYZED") ) $document.Add( (New-Object Lucene.Net.Documents.Field "content", $content, "YES","ANALYZED") ) $document.Add( (New-Object Lucene.Net.Documents.Field "hash", $hash, "YES","NO") ) $indexWriter.AddDocument($document) $processed++ $OutputProgress[$ThreadId] = $processed } else { Start-Sleep -m 100 } } } $threads = Get-WmiObject Win32_Processor | % NumberOfLogicalProcessors $runspaces = 1..$threads | % { $rs = [PowerShell]::Create() $null = $rs.Runspace.SessionStateProxy.Path.SetLocation($pwd.Path) $rs } $inputQueue = New-Object 'System.Collections.Concurrent.ConcurrentQueue[String]' $outputProgress = New-Object 'Int[]' $threads for($counter = 0; $counter -lt $threads; $counter++) { $null = $runspaces[$counter]. AddScript($parallelScript). AddParameter("IndexWriter", $indexWriter). AddParameter("InputQueue", $inputQueue). AddParameter("OutputProgress", $outputProgress). AddParameter("ThreadId", $counter).BeginInvoke() } ## Count the number of files so that we can get an accurate progress measurement Write-Progress -Activity "Collecting files for processing" $fileCount = 0 ## Go through each of the files and index them $path | Foreach-Object { $extension = $_ Write-Progress -Activity "Searching for $extension files" Get-ChildItem -AF -Filter $extension -Recurse | Foreach-Object { $file = $_ $fileCount++ if(($fileCount % 1000) -eq 0) { Write-Progress -Activity "Preparing $extension - $($file.Name) - collected $fileCount files" } $inputQueue.Enqueue($file.FullName) } } do { $totalProcessed = $outputProgress | Measure-Object -Sum | % Sum Write-Progress -Activity "Processing $totalProcessed of $fileCount" -PercentComplete ($totalProcessed * 100 / $fileCount) Start-Sleep -Seconds 2 } while($inputQueue.Count -gt 0) ## Optimize and commit the index Write-Progress -Activity "Optimizing index" $indexWriter.Commit() } finally { ## Clean up $indexWriter.Dispose() $indexDirectory.Dispose() $runspaces | % { $_.Stop(); $_.Dispose() } [GC]::Collect() } } ## Search the indexed database for a given regular expression pattern function Search-ScourContent { [CmdletBinding()] param( ## The query to use when searching [Parameter(Mandatory, Position = 0)] [String[]] $Query, ## The regular expression to apply to results, if any [Parameter()] [String] $RegularExpression, ## The file pattern to limit the search to, if any [Parameter()] $Path = "*" ) ## Ensure they've created an index for the current location. Don't do this for them automatically, ## as it's likely to take a long time. Search parent directories if required. If the index is found ## in a parent directory, we will use the current subdirectory as a filter for results. $scourRoot = $pwd.Path $driveRoot = $pwd.Drive.Root while($scourRoot -ne $driveRoot) { if(Test-Path "$scourRoot\__scour") { break } $scourRoot = (Resolve-Path "$scourRoot\..").Path } ## If we couldn't find the index, throw an error. if(-not (Test-Path "$scourRoot\__scour")) { $PSCmdlet.ThrowTerminatingError( (New-Object System.Management.Automation.ErrorRecord ` "Scour has not yet analyzed the current directory or any of its parents. To create a Scour index, run Initialize-ScourIndex.", "NoIndexForCurrentDirectory", "OpenError", $pwd)) } ## Retain the searchers and index directories in the module scope so that we don't ## have to re-open the indexes for every search. if(-not $SCRIPT:searchers) { $SCRIPT:searchers = @{} $SCRIPT:indexDirectories = @{} } ## If we haven't created the searcher for this location yet, create it now. if(-not $searchers.ContainsKey($scourRoot)) { Write-Verbose "Getting new searcher" $indexDirectory = [Lucene.Net.Store.FSDirectory]::Open("$scourRoot\__scour") $searchers[$scourRoot] = New-Object Lucene.Net.Search.IndexSearcher ([Lucene.Net.Index.IndexReader]::Open($indexDirectory, $true)) $indexDirectories[$scourRoot] = $indexDirectory } ## Parse the user's query $searcher = $searchers[$scourRoot] $analyzer = New-Object Lucene.Net.Analysis.Standard.StandardAnalyzer "LUCENE_CURRENT" $parser = New-Object Lucene.Net.QueryParsers.QueryParser "LUCENE_CURRENT","content",$analyzer $queryObject = $parser.Parse($Query) ## Collect the search results $collector = [Lucene.Net.Search.TopScoreDocCollector]::Create($searcher.MaxDoc, $true) $searcher.Search($queryObject, $collector) ## Go through the search results $collector.TopDocs().ScoreDocs | Foreach-Object Doc | Get-Unique | Foreach-Object { $indexPath = $searcher.Doc($_).Get("path") $indexPath = Join-Path $ScourRoot $indexPath if($indexPath.StartsWith($pwd.Path)) { if($indexPath -like $Path) { if(-not $RegularExpression) { Get-Item -LiteralPath $indexPath } else { Select-String -LiteralPath $indexPath -Pattern $RegularExpression } } } } } |