public/Compare-Embedding.ps1

function Compare-Embedding {
    <#
    .SYNOPSIS
    Calculates the similarity between two embedding vectors.
 
    .DESCRIPTION
    The Compare-Embedding command takes a query embedding vector and searches for the most similar text embeddings within a collection.
    It returns the top N most similar results based on the chosen similarity measure.
 
    .PARAMETER Query
    Some text to be converted to an embedding.
 
    .PARAMETER QueryEmbedding
    The query embedding vector to search with.
 
    .PARAMETER Embeddings
    The collection of text embeddings to search through.
 
    .PARAMETER Top
    Optional. The number of top similar results to return. Default is 25.
 
    .PARAMETER Type
    The type of similarity measure to use. Options are 'Cosine', 'Euclidean', 'Pearson', 'SquaredEuclidean'. Default is 'Cosine'.
 
    .EXAMPLE
    $queryEmbedding = Get-Embedding -Query "brown fox"
    $embeddings = @{
        "The quick brown fox" = (Get-Embedding "The quick brown fox");
        "A lazy dog" = (Get-Embedding "A lazy dog");
        "A cunning fox" = (Get-Embedding "A cunning fox")
    }
    Compare-Embedding -QueryEmbedding $queryEmbedding -Embeddings $embeddings
 
    This command searches for the most similar text embeddings to the query "brown fox".
 
    .EXAMPLE
    $queryEmbedding = Get-Embedding -Query "playful dog"
    $embeddings = Get-Content "descriptions.txt" | ForEach-Object { @{$_ = Get-Embedding $_} }
    Compare-Embedding -QueryEmbedding $queryEmbedding -Embeddings $embeddings -Top 3
 
    This command searches through a collection of embeddings created from a text file and returns the top 3 most similar results to the query.
    #>

    [CmdletBinding()]
    param(
        [string]$Query,
        [double[]]$QueryEmbedding,
        [Parameter(Mandatory)]
        [hashtable]$Embeddings,
        [int]$Top = 25,
        [ValidateSet('Cosine', 'Euclidean', 'Pearson', 'SquaredEuclidean')]
        [string]$Type = 'Cosine'
    )
    process {
        if (-not $Query -and -not $QueryEmbedding) {
            throw "You must specify either Query or QueryEmbedding"
        }

        if ($Query) {
            $QueryEmbedding = Get-Embedding -Text $Query
        }

        $similarities = New-Object System.Collections.ArrayList

        foreach ($key in $Embeddings.Keys) {
            $embeddingVector = $Embeddings[$key]
            $similarity = 0

            switch ($Type) {
                'Cosine' {
                    $dotProduct = 0
                    $normQuery = 0
                    $normEmbedding = 0
                    for ($i = 0; $i -lt $QueryEmbedding.Length; $i++) {
                        $dotProduct += $QueryEmbedding[$i] * $embeddingVector[$i]
                        $normQuery += $QueryEmbedding[$i] * $QueryEmbedding[$i]
                        $normEmbedding += $embeddingVector[$i] * $embeddingVector[$i]
                    }
                    $normQuery = [Math]::Sqrt($normQuery)
                    $normEmbedding = [Math]::Sqrt($normEmbedding)
                    if ($normQuery -ne 0 -and $normEmbedding -ne 0) {
                        $similarity = $dotProduct / ($normQuery * $normEmbedding)
                    }
                }
                'Euclidean' {
                    $distance = 0
                    for ($i = 0; $i -lt $QueryEmbedding.Length; $i++) {
                        $diff = $QueryEmbedding[$i] - $embeddingVector[$i]
                        $distance += $diff * $diff
                    }
                    $distance = [Math]::Sqrt($distance)
                    $similarity = 1 / (1 + $distance)
                }
                'Pearson' {
                    $meanQuery = ($QueryEmbedding | Measure-Object -Average).Average
                    $meanEmbedding = ($embeddingVector | Measure-Object -Average).Average
                    $covariance = 0
                    $varQuery = 0
                    $varEmbedding = 0
                    for ($i = 0; $i -lt $QueryEmbedding.Length; $i++) {
                        $diffQuery = $QueryEmbedding[$i] - $meanQuery
                        $diffEmbedding = $embeddingVector[$i] - $meanEmbedding
                        $covariance += $diffQuery * $diffEmbedding
                        $varQuery += $diffQuery * $diffQuery
                        $varEmbedding += $diffEmbedding * $diffEmbedding
                    }
                    if ($varQuery -ne 0 -and $varEmbedding -ne 0) {
                        $correlation = $covariance / ([Math]::Sqrt($varQuery) * [Math]::Sqrt($varEmbedding))
                        $similarity = ($correlation + 1) / 2
                    }
                }
                'SquaredEuclidean' {
                    $distanceSquared = 0
                    for ($i = 0; $i -lt $QueryEmbedding.Length; $i++) {
                        $diff = $QueryEmbedding[$i] - $embeddingVector[$i]
                        $distanceSquared += $diff * $diff
                    }
                    $similarity = 1 / (1 + $distanceSquared)
                }
            }
            $null = $similarities.Add([PSCustomObject]@{ Command = $key; Similarity = $similarity })
        }

        $similarities | Sort-Object -Property Similarity -Descending | Select-Object -First $Top
    }
}