public/ConvertTo-DbaiMarkdown.ps1
function ConvertTo-DbaiMarkdown { <# .SYNOPSIS Converts various files to Markdown format using AI assistance. .DESCRIPTION This command converts various filetypes (PDF, Word) to Markdown format using an AI assistant. It supports processing multiple files through pipeline input and can check for required content in the output. .PARAMETER Path Specifies the path to the file(s) to be converted. Accepts pipeline input. .PARAMETER RequiredText An array of strings that must be present in the output. If any of these strings are missing, the function will request the AI to try again. .PARAMETER Raw If specified, outputs only the Markdown content without additional metadata. .PARAMETER Retry Specifies the number of times to retry when a required phrase is not found. Default is 1. .EXAMPLE PS C:\> ConvertTo-DbaiMarkdown -Path C:\Documents\file.pdf Converts the specified PDF file to Markdown format. .EXAMPLE PS C:\> Get-ChildItem -Path C:\Documents -Filter *.pdf | ConvertTo-DbaiMarkdown Converts all PDF files in the specified directory to Markdown format. .EXAMPLE PS C:\> ConvertTo-DbaiMarkdown -Path C:\Documents\file.jpg -Raw Converts text in the specified jpg file to Markdown format and outputs only the content. .EXAMPLE PS C:\> ConvertTo-DbaiMarkdown -Path C:\Documents\file.pdf -RequiredText "lyme disease", "vaccination" -Retry 3 Converts the specified PDF file to Markdown format, ensuring that the phrases "lyme disease" and "vaccination" are present in the output. It will retry up to 3 times for each phrase if not found. #> [CmdletBinding()] param ( [Parameter(ValueFromPipeline, ValueFromPipelineByPropertyName)] [Alias("FullName")] [string[]]$Path = (Join-Path $script:ModuleRootLib -ChildPath immunization.pdf), [string[]]$RequiredText, [switch]$Raw, [int]$Retry = 1 ) begin { Write-Verbose "Starting ConvertTo-DbaiMarkdown function" $PSDefaultParameterValues['Write-Progress:Activity'] = "Converting file text to markdown" Write-Verbose "Creating AI Assistant" $assistantName = "TextExtractor" $instructionsfile = Join-Path -Path $script:ModuleRootLib -Childpath instruct-markdown.txt $assistantInstructions = Get-Content $instructionsfile -Raw try { $splat = @{ Name = $assistantName Instructions = $assistantInstructions Model = "gpt-4o-2024-08-06" UseCodeInterpreter = $true } $assistant = New-Assistant @splat Write-Verbose "AI Assistant created successfully with ID: $($assistant.id)" } catch { Write-Verbose "Failed to create AI Assistant: $PSItem" throw "Failed to create AI Assistant: $PSItem" } Write-Verbose "Creating Thread" try { $thread = New-Thread Write-Verbose "Thread created successfully with ID: $($thread.id)" } catch { Write-Verbose "Failed to create Thread: $PSItem" throw "Failed to create Thread: $PSItem" } $totalFiles = 0 $processedFiles = 0 } process { $totalFiles += $Path.Count Write-Verbose "Total files to process: $totalFiles" foreach ($filePath in $Path) { $processedFiles++ try { $filename = (Get-ChildItem -Path $filePath -ErrorAction Stop).Name } catch { throw "File not found: $filePath" continue } Write-Verbose "Processing file $processedFiles of $totalFiles -- $filePath" Write-Progress -Status "Processing file $processedFiles of $totalFiles" -PercentComplete (($processedFiles / $totalFiles) * 100) try { Write-Verbose "Uploading file: $filePath" $file = Add-OpenAIFile -File $filePath -Purpose assistants Write-Verbose "File uploaded successfully with ID: $($file.id)" Write-Verbose "Waiting for file processing to complete" do { $fileStatus = Get-OpenAIFile -FileId $file.id Write-Verbose "Current file status: $($fileStatus.status)" if ($fileStatus.status -eq 'processed') { Write-Verbose "File processing completed" break } elseif ($fileStatus.status -in 'failed', 'cancelled') { throw "File processing $($fileStatus.status)" } Start-Sleep -Seconds 3 } while ($true) Write-Verbose "Adding message to thread" $splat = @{ ThreadId = $thread.id Message = "Filename: $filename" FileIdsForCodeInterpreter = $file.id } $null = Add-ThreadMessage @splat Write-Verbose "Starting thread run" $run = Start-ThreadRun -ThreadId $thread.id -Assistant $assistant.id | Wait-ThreadRun Write-Verbose "Thread run completed with status: $($run.status)" Write-Verbose "Processing run response" $response = Get-ThreadMessage -ThreadId $thread.id -RunId $run.id | Select-Object -Last 1 $result = [PSCustomObject]@{ FileName = $filename Content = $response.SimpleContent.Content } if ($result.Content.ToLower().StartsWith("failure")) { Write-Verbose "Failure detected in response. Starting retry run" $run = Start-ThreadRun -ThreadId $thread.id -Assistant $assistant.id | Wait-ThreadRun $response = Get-ThreadMessage -ThreadId $thread.id -RunId $run.id | Select-Object -Last 1 $result.Content = $response.SimpleContent.Content } if ($RequiredText) { Write-Verbose "Checking for required text phrases" foreach ($phrase in $RequiredText) { Write-Verbose "Checking for phrase: '$phrase'" $retryCount = 0 while ($result.Content -notmatch [regex]::Escape($phrase) -and $retryCount -lt $Retry) { $retryCount++ Write-Verbose "Required phrase '$phrase' not found in the output. Retry attempt $retryCount of $Retry" $message = "The output seems incomplete. Please try again and ensure all relevant information is included." Write-Verbose "Sending message to AI: $message" $null = Add-ThreadMessage -ThreadId $thread.id -Message $message Write-Verbose "Starting retry run" $run = Start-ThreadRun -ThreadId $thread.id -Assistant $assistant.id | Wait-ThreadRun Write-Verbose "Retry run completed with status: $($run.status)" Write-Verbose "Retrieving updated response" $response = Get-ThreadMessage -ThreadId $thread.id -RunId $run.id | Select-Object -Last 1 $result.Content = $response.SimpleContent.Content Write-Verbose "Updated content received" $result.Content | ConvertTo-Json -Depth 3 | Write-Verbose } if ($result.Content -notmatch [regex]::Escape($phrase)) { Write-Verbose "Required phrase '$phrase' still missing after $Retry retry attempts" throw "Failed to include required content after $Retry retry attempts: $phrase" } else { Write-Verbose "Required phrase '$phrase' found after $retryCount retry attempts" } } Write-Verbose "All required phrases have been checked" } Write-Verbose "Checking for failure once more" if ($result.Content.ToLower().StartsWith("failure")) { throw $result.Content } Write-Verbose "Outputting result" if ($Raw) { $result.Content } else { $result } } catch { throw "Failed to process file $filePath | $PSItem" } finally { if ($file) { Write-Verbose "Attempting to delete uploaded file: $($file.id)" try { $null = Remove-OpenAIFile -FileId $file.id Write-Verbose "File deleted successfully" } catch { Write-Warning "Failed to delete uploaded file: $PSItem" } } } } } end { Write-Verbose "Cleaning up resources" try { $null = Remove-Thread -ThreadId $thread.id Write-Verbose "Thread removed successfully" $null = Remove-Assistant -AssistantId $assistant.id Write-Verbose "Assistant removed successfully" } catch { Write-Warning "Failed to clean up resources: $PSItem" } Write-Verbose "ConvertTo-DbaiMarkdown function completed" } } |