public/Request-AITChatCompletion.ps1

function Request-AITChatCompletion {
    <#
    .SYNOPSIS
    Creates a chat completion using the specified model and input in the AI Toolkit API.
 
    .DESCRIPTION
    The Request-AITChatCompletion cmdlet sends a POST request to the AI Toolkit API to create a chat completion using the specified model and input. It includes various parameters with defaults to fine-tune the AI's response.
 
    .PARAMETER Model
    The name of the model to use for the chat completion.
 
    .PARAMETER Message
    The input message for the chat completion.
 
    .PARAMETER Temperature
    The sampling temperature for generating the completion. Higher values result in more random outputs. Default is 0.7.
 
    .PARAMETER MaxToken
    The maximum number of tokens to generate in the completion. Default is 100.
 
    .PARAMETER TopP
    The cumulative probability threshold for token sampling. Default is 1.0.
 
    .PARAMETER FrequencyPenalty
    The penalty factor for repeated tokens. Default is 0.0.
 
    .PARAMETER PresencePenalty
    The penalty factor for new tokens not present in the input. Default is 0.0.
 
    .PARAMETER NoStream
    Indicates whether to disable streaming of the response. By default, streaming is enabled.
 
    .PARAMETER Raw
    Returns the raw API response instead of parsed content. Raw is only for non-streaming requests.
 
    .EXAMPLE
    Request-AITChatCompletion -Model mistral-7b-v02-int4-cpu -Message "Hello, how are you?"
 
    This command creates a chat completion using the mistral-7b-v02-int4-cpu model with the input message "Hello, how are you?" using default parameter settings.
    #>

    [CmdletBinding()]
    param(
        [string]$Model = $script:mountedmodel,
        [Parameter(Mandatory)]
        [string]$Message,
        [double]$Temperature = 0.7,
        [int]$MaxToken = 100,
        [double]$TopP = 1.0,
        [double]$FrequencyPenalty = 0.0,
        [double]$PresencePenalty = 0.0,
        [switch]$NoStream,
        [switch]$Raw
    )
    process {
        Write-Verbose "Starting Request-AITChatCompletion"

        if (-not $Model) {
            Write-Error "No model is currently loaded. Use the Mount-AITModel cmdlet to load a model."
            return
        }

        Write-Verbose "Using model: $Model"

        $endpoint = "$script:AIToolkitBaseUrl/v1/chat/completions"
        Write-Verbose "Endpoint: $endpoint"

        $requestBody = @{
            model             = $Model
            messages          = @(
                @{
                    role    = "user"
                    content = $Message
                }
            )
            temperature       = $Temperature
            max_tokens        = $MaxToken
            top_p             = $TopP
            frequency_penalty = $FrequencyPenalty
            presence_penalty  = $PresencePenalty
            stream            = (-not $NoStream)
        }

        try {
            $splat = @{
                Uri         = $endpoint
                Method      = "POST"
                Body        = ($requestBody | ConvertTo-Json)
                ContentType = "application/json"
            }

            Write-Verbose "Sending request to AI Toolkit API"

            if ($NoStream) {
                Write-Progress -Activity "Requesting AI Chat Completion" -Status "Sending request..."
                $response = Invoke-RestMethod @splat
                Write-Progress -Activity "Requesting AI Chat Completion" -Status "Complete"

                if ($Raw) {
                    return $response
                } else {
                    # This is a simplified parsing. Adjust based on your actual response structure.
                    if ($response.choices -and $response.choices.Count -gt 0) {
                        return $response.choices[0].message.content
                    } else {
                        Write-Warning "Unexpected response structure"
                        return $response
                    }
                }
            } else {
                $responseStream = Invoke-WebRequest @splat -UseBasicParsing -ErrorAction Stop -TimeoutSec 0
                $reader = [System.IO.StreamReader]::new($responseStream.RawContentStream)
                $responseBuilder = [System.Text.StringBuilder]::new()

                while (-not $reader.EndOfStream) {
                    $line = $reader.ReadLine()
                    if ($line.StartsWith("data: ")) {
                        $data = $line.Substring(6)
                        if ($data -eq "[DONE]") {
                            break
                        }
                        
                        try {
                            $jsonData = $data | ConvertFrom-Json
                            $content = $jsonData.choices[0].delta.content
                            if ($content) {
                                Write-Host $content -NoNewline
                                $null = $responseBuilder.Append($content)
                            }
                        } catch {
                            Write-Error "Failed to parse JSON: $_"
                        }
                    }
                }
                return $responseBuilder.ToString()
            }
        } catch {
            throw $PSItem
        } finally {
            Write-Progress -Activity "Requesting AI Chat Completion" -Completed
        }
    }
}