public/Measure-TuneToken.ps1

function Measure-TuneToken {
    <#
    .SYNOPSIS
    This command measures the token usage of the provided text using the SharpToken library.
 
    .DESCRIPTION
    Measure-TuneToken takes an array of strings, combines them, and utilizes the SharpToken library to tokenize the combined string. It then returns the count of tokens as a PSCustomObject.
 
    .PARAMETER InputObject
    An array of strings to be tokenized. This parameter is mandatory and accepts pipeline input.
 
    .PARAMETER Model
    Specifies the model to use for tokenization. Defaults to 'cl100k_base'. Other valid values are 'r50k_base', 'p50k_base', and 'p50k_edit'.
 
    .EXAMPLE
    Measure-TuneToken -InputObject "Hello, world!" -Model cl100k_base
 
    This command will tokenize the string "Hello, world!" using the 'cl100k_base' model and return the token count as a PSCustomObject.
 
    .EXAMPLE
    Get-Content -Path C:\path\to\file.txt | Measure-TuneToken -Model p50k_base
 
    This command will read the content of file.txt, pass the content as a string to Measure-TuneToken, and return the token count as a PSCustomObject.
    #>


    [CmdletBinding()]
    param (
        [Parameter(Mandatory, ValueFromPipeline)]
        [Alias('String')]
        [string[]]$InputObject,
        [ValidateSet('cl100k_base', 'p50k_base', 'r50k_base', 'gpt4', 'gpt35turbo', 'textembeddingada002', 'codex', 'textdavinci002', 'textdavinci003', 'davinci')]
        [string]$Model = 'cl100k_base'
    )
    begin {
        if ($script:modelmapping.ContainsKey($Model)) {
            $encodingName = $script:modelmapping[$Model]
        } else {
            $encodingName = $Model
        }

        $combinedString = New-Object System.Text.StringBuilder
    }
    process {
        foreach ($string in $InputObject) {
            $null = $combinedString.Append($string)
        }
    }
    end {
        $encodingName = $Model.ToString()
        $encoding = [SharpToken.GptEncoding]::GetEncoding($encodingName)
        $encoded = $encoding.Encode($combinedString.ToString())
        $tokenCount = $encoded.Count

        # Pricing details based on the provided table
        $pricingDetails = @{
            'cl100k_base' = @{ Training = 0.0080; InputUsage = 0.0120; OutputUsage = 0.0160 }
            'p50k_base'   = @{ Training = 0.0060; InputUsage = 0.0120; OutputUsage = 0.0120 }
            'r50k_base'   = @{ Training = 0.0004; InputUsage = 0.0016; OutputUsage = 0.0016 }
        }

        $trainingCost = $tokenCount * ($pricingDetails[$encodingName].Training / 1000)
        $inputUsageCost = $tokenCount * ($pricingDetails[$encodingName].InputUsage / 1000)
        $outputUsageCost = $tokenCount * ($pricingDetails[$encodingName].OutputUsage / 1000)

        [PSCustomObject]@{
            TokenCount      = $tokenCount
            TrainingCost    = $trainingCost
            InputUsageCost  = $inputUsageCost
            OutputUsageCost = $outputUsageCost
        }
    }
}