private/Export-TokenChunk.ps1

function Export-TokenChunk {
    <#
    .SYNOPSIS
        Splits files into smaller chunks based on a token limit.
 
    .DESCRIPTION
        The Export-TokenChunk function reads files from a specified path and splits them into smaller chunks based on a specified token limit.
        It processes the files in chunks and exports the content to new files with specified prefixes.
 
    .PARAMETER Path
        The path to the files to be processed. Can be piped or provided directly.
 
    .PARAMETER TokenLimit
        The maximum number of tokens per chunk. Defaults to 8000.
 
    .PARAMETER OutputPath
        The directory where the chunk files will be saved. Defaults to the current directory.
 
    .PARAMETER OutputPrefix
        The prefix for the output chunk files. Defaults to "chunk-".
 
    .PARAMETER Recurse
        Switch to indicate if the command should process files in subdirectories recursively.
 
    .EXAMPLE
        PS C:\> Export-TokenChunk -Path "C:\Documents" -TokenLimit 5000 -OutputPath "C:\Chunks" -OutputPrefix "doc-"
 
        Processes files in "C:\Documents" and splits them into chunks of 5000 tokens each, saving the chunks in "C:\Chunks" with the prefix "doc-".
 
    .EXAMPLE
        PS C:\> Get-ChildItem -Path "C:\Logs" -File | Export-TokenChunk -TokenLimit 10000
 
        Processes all files in "C:\Logs" and splits them into chunks of 10000 tokens each, saving the chunks in the current directory.
 
    .EXAMPLE
        PS C:\> $splat = @{
                Path = "C:\Data"
                TokenLimit = 8000
                OutputPath = "C:\Output"
                OutputPrefix= "data-chunk-"
                Recurse = $true
            }
        PS C:\> Export-TokenChunk @splat
 
        Processes files in "C:\Data" and its subdirectories, splitting them into chunks of 8000 tokens each, saving the chunks in "C:\Output" with the prefix "data-chunk-".
 
#>

    [CmdletBinding()]
    param (
        [Parameter(Mandatory, ValueFromPipeline)]
        [psobject[]]$Path,
        [int]$TokenLimit = 8000,
        [string]$OutputPath = ".",
        [string]$OutputPrefix = "chunk-",
        [switch]$Recurse
    )
    begin {
        $files = New-Object System.Collections.ArrayList
    }
    process {
        if (-not $Path.FullName) {
            $Path = Get-ChildItem -Path $Path -File -Recurse:$Recurse
        }

        foreach ($file in $Path) {
            $null = $files.Add($file)
        }
    }
    end {
        $chunkNumber = 1
        $startIndex = 0
        $endIndex = 0

        $totalFiles = $files.Count
        $currentFile = 1

        while ($currentFile -le $totalFiles) {
            # progress bar
            Write-Progress -Activity "Exporting token chunks" -Status "Processing chunk $chunkNumber" -PercentComplete (($currentFile / $totalFiles) * 100)
            $tokenCount = 0
            $chunk = @()

            while ($tokenCount -lt $TokenLimit -and $currentFile -le $totalFiles) {
                $content = Get-Content $files[$currentFile - 1].FullName -Raw
                if ($content) {
                    $tokenInfo = Measure-TuneToken -InputObject $content
                    $tokenCount += $tokenInfo.TokenCount
                }
                $chunk += $files[$currentFile - 1]
                $currentFile++
            }

            $endIndex = $currentFile - 1
            $chunkFileName = Join-Path -Path $OutputPath -ChildPath "$OutputPrefix$($startIndex + 1)-$($endIndex).txt"
            $chunk | Get-Content | Set-Content $chunkFileName
            $startIndex = $endIndex
            $chunkNumber++
            Get-ChildItem $chunkFileName
        }
    }
}