private/ConvertTo-TuneFile.ps1

function ConvertTo-TuneFile {
    <#
        .SYNOPSIS
            Converts files to JSONL format for chatbot training.
 
        .DESCRIPTION
            This function takes various input types and converts them to JSONL format. Each line or row becomes an "assistant" message in the JSONL.
 
            This command is under development and made me realize that they really mean it when they say that you gotta have good data for your training model. This probably will make bad data.
 
        .PARAMETER InputObject
            The input to be converted. Can be a file path, FileInfo object, or a PSCustomObject.
 
        .PARAMETER SystemContent
            The system message for the JSONL file.
 
        .PARAMETER Include
            When converting a module, include examples in the JSONL file.
 
            Values include: Synopsis, Description, Parameters, Parameters, Examples, All
 
        .PARAMETER ExcludeParameters
            When converting a module, exclude the specified parameters from the JSONL file.
 
        .EXAMPLE
            Import-Module dbatools
            Get-Module dbatools | ConvertTo-TuneFile -SystemContent "You are a friendly dbatools support chatbot and PowerShell expert who helps people find the commands they need"
 
            This example demonstrates how to convert a dbatools module into a Tune file, defining the chatbot's persona as a friendly dbatools support expert.
 
        .EXAMPLE
            ConvertTo-TuneFile -InputObject C:\path\to\file.txt -SystemContent 'You are a friendly dbatools and PowerShell expert who offers tech support to DBAs and Systems Engineers'
 
            This example shows how to convert a text file into a Tune file, defining the chatbot's persona as a friendly tech support expert for dbatools and PowerShell.
 
        .EXAMPLE
            ConvertTo-TuneFile -InputObject C:\path\to\file.csv -SystemContent 'You are a chatbot'
 
            This example demonstrates converting a CSV file into a Tune file, with a simple system content defining the chatbot's persona.
    #>

    [CmdletBinding(SupportsShouldProcess)]
    param (
        [Parameter(Mandatory, ValueFromPipeline)]
        [psobject[]]$InputObject,
        [Parameter(Mandatory)]
        [string]$SystemContent,
        [ValidateSet('Synopsis', 'Description', 'Parameters', 'Examples', 'All')]
        [string[]]$Include = @( 'Synopsis', 'Description' ),
        [string[]]$ExcludeParameters
    )
    begin {
        if ($Include -contains 'All') {
            $Include = @('Synopsis', 'Description', 'Parameters', 'Examples')
        }
    }
    process {
        $null = $PSDefaultParameterValues['*:Compress'] = $true
        foreach ($object in $InputObject) {
            $type = $null
            # Determine the type of InputObject
            if ($object -is [string] -and (Test-Path $object)) {
                $type = "path"
            } elseif ($object -is [System.IO.FileInfo]) {
                $type = "path"
            } elseif ($object.PSObject.TypeNames -match "PSCustomObject") {
                $type = "psobject"
            } elseif ($object.PSObject.TypeNames -match "PSModuleInfo") {
                # oh yeah!
                $type = "module"
            } else {
                $type = "unsupported"
            }

            # Process based on the file type
            switch ($type) {
                "path" {
                    foreach ($file in $object) {
                        Write-Verbose "Processing $file"
                        $extension = (Get-ChildItem $file).Extension
                        $basename = (Get-ChildItem $file).BaseName
                        $outputFilePath = Join-Path (Get-ChildItem $file).DirectoryName "$basename.jsonl"

                        switch ($extension) {
                            '.txt' {
                                $fileContent = Get-Content $file
                                $jsonlContent = @()

                                foreach ($line in $fileContent) {
                                    $json = @{
                                        messages = @(
                                            @{ role = 'system'; content = $SystemContent },
                                            @{ role = 'assistant'; content = $line }
                                        )
                                    } | ConvertTo-Json
                                    $jsonlContent += $json
                                }
                                $jsonlContent -join "`n" | Set-Content -Path $outputFilePath
                            }

                            '.csv' {
                                $csvContent = Import-Csv $file
                                $jsonlContent = @()

                                foreach ($row in $csvContent) {
                                    $json = @{
                                        messages = @(
                                            @{ role = 'system'; content = $SystemContent },
                                            @{ role = 'assistant'; content = $row }
                                        )
                                    } | ConvertTo-Json
                                    $jsonlContent += $json
                                }
                                $jsonlContent -join "`n" | Set-Content -Path $outputFilePath
                            }

                            '.pdf' {
                                $pdfContent = Import-PDFFile $file
                                $jsonlContent = @()

                                foreach ($line in $pdfContent) {
                                    $json = @{
                                        messages = @(
                                            @{ role = 'system'; content = $SystemContent },
                                            @{ role = 'assistant'; content = $line }
                                        )
                                    } | ConvertTo-Json
                                    $jsonlContent += $json
                                }
                                $jsonlContent -join "`n" | Set-Content -Path $outputFilePath
                            }

                            default {
                                throw "File type $extension is not supported."
                            }
                        }
                    }
                }

                "psobject" {
                    # ... (placeholder code for handling PSCustomObjects)
                }

                "module" {
                    $outputFilePath = Join-Path -Path $pwd -ChildPath "$($object.Name).jsonl"
                    $jsonlContent = New-Object System.Collections.ArrayList
                    $commands = Get-Command -Module $object.Name
                    $commandCount = $commands.Count
                    $i = 0

                    foreach ($command in $commands) {
                        $messageBlocks = New-Object System.Collections.ArrayList
                        $commandname = $command.Name
                        Write-Verbose "Processing command: $commandname"
                        $i++
                        $progress = @{
                            Status          = "Processing $commandname"
                            Activity        = "Processing $commandCount commands"
                            PercentComplete = (($i / $commandCount) * 100)
                        }
                        Write-Progress @progress
                        $commandHelp = Get-Help -Name $commandname
                        $null = $messageBlocks.Add(@{ role = 'system'; content = $SystemContent })

                        # For Synopsis
                        if ($Include -contains 'Synopsis') {
                            $synopsis = $commandHelp.Synopsis
                            if ($synopsis) {
                                Write-Verbose "Processing Command synopsis"
                                if ($IncludeSystemOnAll) {
                                    $null = $messageBlocks.Add(@{ role = 'system'; content = $SystemContent })
                                }
                                $null = $messageBlocks.Add(@{ role = 'user'; content = "Can you tell me what the $($commandHelp.Name) command does?" })
                                $null = $messageBlocks.Add(@{ role = 'assistant'; content = ($synopsis -join "`n") })
                            } else {
                                Write-Verbose "No synopsis found for $($command.Name)"
                            }
                        }

                        # For Description
                        if ($Include -contains 'Description') {
                            $description = $commandHelp.Description
                            if ($description) {
                                Write-Verbose "Processing description"
                                if ($IncludeSystemOnAll) {
                                    $null = $messageBlocks.Add(@{ role = 'system'; content = $SystemContent })
                                }
                                $null = $messageBlocks.Add(@{ role = 'user'; content = "What is the $($command.Name) command for?" })
                                $null = $messageBlocks.Add(@{ role = 'assistant'; content = ($description.Text -join "`n") })
                            } else {
                                Write-Verbose "No description found for $($command.Name)"
                            }
                        }

                        # For Parameters
                        if ($Include -contains 'Parameters') {
                            $params = $commandHelp.Parameters.Parameter
                            foreach ($param in $params) {
                                if ($param.name -in $ExcludeParameters) { continue }
                                Write-Verbose "Processing parameter: $($param.name)"
                                if ($param.description) {
                                    if ($IncludeSystemOnAll) {
                                        $null = $messageBlocks.Add(@{ role = 'system'; content = $SystemContent })
                                    }
                                    $null = $messageBlocks.Add(@{ role = 'user'; content = "What is the $($param.name) parameter for the $($command.Name) for?" })
                                    $null = $messageBlocks.Add(@{ role = 'assistant'; content = $param.description[0].Text })
                                } else {
                                    Write-Verbose "No description found for $($param.name)"
                                }
                            }
                        }

                        # For Examples
                        if ($Include -contains 'Examples') {
                            $examples = $commandHelp.Examples.Example
                            foreach ($example in $examples) {
                                if ($example.remarks) {
                                    Write-Verbose "Processing example"
                                    if ($IncludeSystemOnAll) {
                                        $null = $messageBlocks.Add(@{ role = 'system'; content = $SystemContent })
                                    }
                                    $null = $messageBlocks.Add(@{ role = 'user'; content = "I want to $($example.remarks.text)" })
                                    $null = $messageBlocks.Add(@{ role = 'assistant'; content = $($example.code) })
                                } else {
                                    Write-Verbose "No description found for $($example.code)"
                                }
                            }
                        }

                        $jsontext = @{
                            messages = $messageBlocks
                        }

                        $json = $jsontext | ConvertTo-Json -Depth 3
                        $null = $jsonlContent.Add($json)
                    }

                    $jsonlContent -join "`n" | Set-Content -Path $outputFilePath
                    $tokeninfo = Get-Content -Path $outputFilePath | Measure-TuneToken
                }

                "Unsupported" {
                    throw "The type of InputObject is not supported."
                }
            }
            [PSCustomObject]@{
                FileName        = (Get-ChildItem $outputFilePath).Name
                TokenCount      = $tokeninfo.TokenCount
                TrainingCost    = $tokeninfo.TrainingCost
                InputUsageCost  = $tokeninfo.InputUsageCost
                OutputUsageCost = $tokeninfo.OutputUsageCost
            }
        }
    }
}