TisaneWeb.psm1

#Region '.\Private\Save-TisaneSettings.ps1' 0
Function Save-TisaneSettings([String] $settingName, $settingValue){
    [Environment]::SetEnvironmentVariable($settingName, $settingValue, 'User')
}
#EndRegion '.\Private\Save-TisaneSettings.ps1' 4
#Region '.\Public\Excel-LawWeb.ps1' 0
function Excel-LawWeb{
  [CmdletBinding()]
Param(
     [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="Tisane API key: ")][String] $APIkey, # from https://dev.tisane.ai/developer
     [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="Language code: ")][String] $languageCode, # assuming the spreadsheet is monolingual
     [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="Spreadsheet path: ")][String] $path, # for both the input and the output
     [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="Spreadsheet local name: ")][String] $filename # input spreadsheet
)


# if (-not($path -like '*\')) {
# $path = $path + "\"
# }


# Replace backslashes with forward slashes if they exist
$path = $path -replace '\\', '/'

# Add a forward slash to the end of the path if it's missing
if (-not($path -match '*/')) {
    $path = $path + '/'
}

$SPREADSHEET_PATHNAME = "$path$filename"
$outFilename = $path + 'out_' + $filename
$ROW_COUNT_IN_SPREADSHEET = 1000

## overcome the HTTPS error: https://stackoverflow.com/questions/11696944/powershell-v3-invoke-webrequest-https-error
# If (-not ("TrustAllCertsPolicy" -as [type])) {
# Add-Type @"
# using System.Net;
# using System.Security.Cryptography.X509Certificates;
# public class TrustAllCertsPolicy : ICertificatePolicy {
# public bool CheckValidationResult(
# ServicePoint srvPoint, X509Certificate certificate,
# WebRequest request, int certificateProblem) {
# return true;
# }
# }
# "@
# }
# [System.Net.ServicePointManager]::CertificatePolicy = New-Object TrustAllCertsPolicy

$header = @{}
$header.Add('Ocp-Apim-Subscription-Key', "$APIkey")


$Excel = New-Object -ComObject Excel.Application
$Workbook = $Excel.Workbooks.Open($SPREADSHEET_PATHNAME)
$srcSheet = $Workbook.Sheets.Item(1)

$startLine = 2
$endLine = $ROW_COUNT_IN_SPREADSHEET
$outLine = 2


For ($i=$startLine; $i -le $endLine; $i++) {
  $content = $srcSheet.Cells.Item($i,1).Text
  if (-not $content) {continue}
  
  $pct = (($i - $startLine) / ($endLine - $startLine)) * 100
  Write-Progress -Activity "[$i] $content" -Status "$pct% complete" -PercentComplete $pct
  $inJsonBody = '{"language": "' + $languageCode + '", "content": "' + $content + '", "settings": {"deterministic": true, "format": "dialogue", "sentiment": false, "snippets":true, "entities": true, "topic_standard":"native", "optimize_topics":true}}'
  $parsedTisane = Invoke-RestMethod -Uri "https://api.tisane.ai/parse" -Method POST -Headers $header -Body $inJsonBody -UseBasicParsing -SkipCertificateCheck
  $crimeDomain = ''
  $criminalActivity = ''
  $personalAttacks = ''
  $hateSpeech = ''
  $contacts = ''
  $contactDetails = ''
  $sexualAdvances = ''
  $people = ''
  $locations = ''
  $time_ranges = ''
  $dates = ''
  $times = ''
  $files = ''
  $phones = ''
  $orgs = ''
  $software = ''
  if ($parsedTisane.abuse) {
    $parsedTisane.abuse | Foreach-Object {
      $abuseText = $_.text
      $abuseTags = $_.tags
      switch ($_.type) {
        'criminal_activity' {
          $crimePrefix = ''
          if ($abuseTags) {
            $abuseTags | Foreach-Object {
              if ($_ -ne 'addressee' -or $_ -ne 'quantitative') {
              if ($crimePrefix) {
                $crimePrefix = $crimePrefix + '/' + $_
              } else {
                $crimePrefix = $_
              }
              switch ($_) {
                'scam' { $crimeDomain = "fraud 🤥" }
                'soft_drug' { $crimeDomain = "drugs 🌿" }
                'hard_drug' { $crimeDomain = "drugs 💉" }
                'medication' { $crimeDomain = "drugs 💊" }
                'death' { $crimeDomain = "death 💀" }
                'data' { $crimeDomain = "identity and data theft 💳" }
              }
              }
            }
            
          }
          
          if (-not $crimeDomain -and $parsedTisane.topics) {
            $parsedTisane.topics | Foreach-Object {
               switch ($_) {
                'narcotic' { $crimeDomain = "drugs 💉" }
                'drug' { $crimeDomain = "drugs 💉" }
                'soft drug' { $crimeDomain = "drugs 🌿" }
                'hard drug' { $crimeDomain = "drugs 💉" }
                'medication' { $crimeDomain = "drugs 💊" }
                'threat' {$crimeDomain = "threat 👿"}
                'planning' {$crimeDomain = "planning ✍"}
                'sourcing' {$crimeDomain = "procurement 📰"}
                'promotion' {$crimeDomain = "promotion 📢"}
                'child abuse' {$crimeDomain = "child abuse 🚸"}
                'animal' {$crimeDomain = "wildlife and poaching 🦏"}
                'identity theft' {$crimeDomain = "identity and data theft 💳"}
                'credit card' {$crimeDomain = "identity and data theft 💳"}
                'firearm' {$crimeDomain = "firearms 🔫"}
                'cryptocurrency' {$crimeDomain = "cryptocurrency ₿"}
                'fraud' {$crimeDomain = "fraud 🤥"}
                'explosive' {$crimeDomain = "explosives 💣"}
                'explosive device' {$crimeDomain = "explosives 💣"}
                  
              }
           }
          }
          
          if ($crimePrefix -and $criminalActivity.IndexOf($crimePrefix) -lt 0) {
            $criminalActivity = $criminalActivity + ' [' + $crimePrefix + '] ' + $abuseText
          } else {
            $criminalActivity = $criminalActivity + ' ' + $abuseText
          }
        }
        'data' {
          $criminalActivity = $criminalActivity + ' ' + $abuseText
          $crimeDomain = $crimeDomain + " identity and data theft 💳"
        }
        'personal_attack' {
          $personalAttacks = $personalAttacks + " " + $abuseText
        }
        'bigotry' {
          $hateSpeech = $hateSpeech + " " + $abuseText
        }
        'sexual_advances' {
          $sexualAdvances = $sexualAdvances + " " + $abuseText
        }
        'external_contacts' {
          $contacts = $contacts + " " + $abuseText
        }
      }
    }
    
  }
  
  if ($parsedTisane.entities_summary) {
    $parsedTisane.entities_summary | Foreach-Object {
      if ($_.type -eq 'software' -or $_.type[0] -eq 'software' -and $_.type[1] -ne 'website' -and $_.type[2] -ne 'website' -or $_.type[1] -eq 'software' -and $_.type[0] -ne 'website' -and $_.type[2] -ne 'website') {
        if ($software) {
          $software = $software + ' / ' + $_.name
        } else {
          $software = $_.name
        }
      }
      else {
        if ($_.type -eq 'place' -or $_.type[0] -eq 'place' -or $_.type[1] -eq 'place') {
          if ($locations) {
            $locations = $locations + ' / ' + $_.name
          } else {
            $locations = $_.name
          }
        }
        else {
          if ($_.type -eq 'organization' -or $_.type[0] -eq 'organization' -or $_.type[1] -eq 'organization') {
            if ($orgs) {
              $orgs = $orgs + ' / ' + $_.name
            } else {
              $orgs = $_.name
            }
          } else {
            if ($_.type -eq 'person' -or $_.type -eq 'username') {
              if ($people) {
                $people = $people + ' / ' + $_.name
              } else {
                $people = $_.name            
              }
            } else {
              if ($_.type -eq 'email' -or $_.type -eq 'username') {
                if ($contactDetails) {
                  $contactDetails = $contactDetails + ' / ' + $_.name
                } else {
                  $contactDetails = $_.name            
                }
              } else {
                $name = $_.name
                switch ($_.type) 
                { 
                  'crypto' {
                    if (-not($crimeDomain)) {
                      $crimeDomain = "cryptocurrency ₿"
                    }
                  }
                  'time_range' { 
                    if ($time_ranges) {
                      $time_ranges = $time_ranges + ' / ' + $name
                    } else {
                      $time_ranges = $name
                    }
                  }
                  'date' { 
                    if ($dates) {
                      $dates = $dates + ' / ' + $name
                    } else {
                      $dates = $name
                    }
                  }
                  'time' { 
                    if ($times) {
                      $times = $times + ' / ' + $name
                    } else {
                      $times = $name
                    }
                  }
                  'file' {
                    if ($files) {
                      $files = $files + ' / ' + $name
                    } else {
                      $files = $name
                    }
                  }
                  'phone' {
                    if ($phones) {
                      $phones = $phones + ' / ' + $name
                    } else {
                      $phones = $name
                    }
                  }

                }
              }
            }
          }
        }
      }
    }
  }
  $srcSheet.Cells.Item($i,2).Value = $criminalActivity
  $srcSheet.Cells.Item($i,3).Value = $crimeDomain
  $srcSheet.Cells.Item($i,4).Value = $personalAttacks
  $srcSheet.Cells.Item($i,5).Value = $hateSpeech
  $srcSheet.Cells.Item($i,6).Value = $sexualAdvances
  $srcSheet.Cells.Item($i,7).Value = $contacts
  $srcSheet.Cells.Item($i,8).Value = $people
  $srcSheet.Cells.Item($i,9).Value = $orgs
  $srcSheet.Cells.Item($i,10).Value = $software
  $srcSheet.Cells.Item($i,11).Value = $locations
  $srcSheet.Cells.Item($i,12).Value = $time_ranges
  $srcSheet.Cells.Item($i,13).Value = $dates
  $srcSheet.Cells.Item($i,14).Value = $times
  $srcSheet.Cells.Item($i,15).Value = $files
  $srcSheet.Cells.Item($i,16).Value = $phones
}

Write-Progress -Activity "Almost done" -Status "Saving the spreadsheet"


$Workbook.SaveAs($outFilename)
$workbook.Close($false)
[void][System.Runtime.InteropServices.Marshal]::ReleaseComObject([System.__ComObject]$Excel)
[gc]::Collect()
[gc]::WaitForPendingFinalizers()
Remove-Variable excel -ErrorAction SilentlyContinue
}
#EndRegion '.\Public\Excel-LawWeb.ps1' 280
#Region '.\Public\Generate-FamilyCleanupTestCSV.ps1' 0
Function Generate-FamilyCleanupTestCSV{
    [CmdletBinding()]
    Param(
       [Parameter(Mandatory = $true, HelpMessage="comma-delimited list of family IDs: ")][int[]] $familyIDs,
       [Parameter(Mandatory = $true, HelpMessage="comma-delimited list of language codes: ")][String[]] $languages,
       [Parameter(Mandatory = $true, HelpMessage="Spreadsheet path: ")][int[]] $noiseFamilyIDs,
       [Parameter(Mandatory = $false, HelpMessage="Path to save the CSV files (Default current directory: )")][String] $path = (Get-Location).Path,
       [Parameter(Mandatory = $false, HelpMessage="base url to tisane default (https://api.tisane.ai)")][String] $baseUrl = "https://api.tisane.ai"
       )
    $tisaneApiKey = Set-TisaneApiKey
    $headers = @{
        "Content-Type" = "application/json;charset=utf-8";
        "Ocp-Apim-Subscription-Key" = $tisaneApiKey
    }
    foreach ($language in $languages) {
        $csvObjects = @()
        $allNoiseLexemes = @{}
        Write-Host "Fetching data for $language families" -ForegroundColor Green
        foreach ($noiseFamilyID in $noiseFamilyIDs) {
            
            $noiseUrl = "$baseUrl/lm/inflections?language=$language&family=$noiseFamilyID"
            $noiseResponse = Invoke-WebRequest -Uri $noiseUrl -Headers $headers -ContentType 'application/json; charset=utf-8'
            $noiseResponse = [System.Text.Encoding]::UTF8.GetString($noiseResponse.Content)
            $noiseLexemes = ($noiseResponse | ConvertFrom-Json)| ForEach-Object { $_.lemma}
            $noiseLexemes = $noiseLexemes | ForEach-Object { $_.ToString() }
            # $allNoiseLexemes += $noiseLexemes
            if ($noiseLexemes.Count -gt 0){
                $noiseLexemes = $noiseLexemes -join ', '
                $allNoiseLexemes[$noiseFamilyID] = $noiseLexemes
                # $noiseLexemes.GetType().FullName
            }
        }
        # $allNoiseLexemes
        # $allNoiseLexemes.GetType().FullName
        $i=0
        foreach ($familyID in $familyIDs) {
            $pct = $i/$familyIds.Length *100
            Write-Progress -Activity "Processing family [$familyID]" -Status "$pct%" -PercentComplete $pct
            $i++
            $familyUrl = "$baseUrl/lm/family?id=$familyID"
            $familyResponse = Invoke-WebRequest -Uri $familyUrl -Headers $headers -ContentType 'application/json; charset=utf-8'
            $familyResponse = [System.Text.Encoding]::UTF8.GetString($familyResponse.Content) | ConvertFrom-Json
            $lexemesUrl = "$baseUrl/lm/inflections?language=$language&family=$familyID"
            $lexemesResponse = Invoke-WebRequest -Uri $lexemesUrl -Headers $headers -ContentType 'application/json; charset=utf-8'
            $lexemesResponse = [System.Text.Encoding]::UTF8.GetString($lexemesResponse.Content) | ConvertFrom-Json

            $lexemes = @($lexemesResponse | ForEach-Object { $_.lemma })
            if ($allNoiseLexemes.Count -gt 0){
                $randomNoiseFamilyID = Get-Random -InputObject @($allNoiseLexemes.Keys)
                # $randomNoiseFamilyID
                # $allNoiseLexemes.Keys
                # Write-Host "ran"
                # $randomNoiseFamilyID
                $randomNoiseLexemes = $allNoiseLexemes[$randomNoiseFamilyID] -join ", "
                $randomNoiseLexemes = $randomNoiseLexemes.Split(", ") | Select-Object -Unique | ForEach-Object { $_.Trim() }
                $randomNoiseLexemes = $randomNoiseLexemes -join ", "
                # $allNoiseLexemes[$randomNoiseFamilyID]
                # $randomNoiseLexemes
                # $randomNoiseLexemes.GetType().FullName

            }else{
                $randomNoiseLexemes = ""
            }
            # $lexemes | ForEach-Object { $_.GetType().FullName }
            # $randomNoiseLexemes | ForEach-Object { $_.GetType().FullName }

            # $noisyLexemes = $lexemes -join ', '
            $noisyLexemes = $lexemes | Select-Object -Unique | ForEach-Object { $_.Trim() }
            $noisyLexemes = $noisyLexemes -join ", "
            # $randomNoiseLexemes = Get-Random -InputObject $allNoiseLexemes -Count 4
            # $randomNoiseFamilyID = Get-Random -InputObject $noiseFamilyIDs
            # $randomNoiseLexemes = $allNoiseLexemes[$randomNoiseFamilyID]
            # $noisyLexemes += $randomNoiseLexemes
            $noisyLexemes += ", " + $randomNoiseLexemes
            # $noisyArray = $noisyLexemes.Split(", ") + $randomNoiseLexemes.Split(", ")
            # $noisyLexemes = $noisyArray | Select-Object -Unique | ForEach-Object { $_.Trim() }
            # $noisyLexemes = $noisyLexemes -join ", "
            $noisyLexemes
            # $noisyLexemes -join ', '
            $row = [PSCustomObject]@{
                "Family_ID" = $familyID
                "Family_Description" = $familyResponse.description
                "Family_Definition" = $familyResponse.definition
                "Lexemes" = $lexemes -join ', '
                "Noisy_List_of_Lexemes" = $noisyLexemes
                "GenAI_Response" = $null
                "Cleaned_GenAI_Response" = $null
                "Score_for_Original_Input" = $null
                "Score_for_Noisy_Input" = $null
                "Gold_Standard_for_Lexemes" = "n/a"
                "Gold_Standard_for_Noisy_lexemes" = $randomNoiseLexemes
            }

            $csvObjects += $row
        }

        $csvObjects | Export-Csv -Path "$path/TisaneFamilyCleanupTest_$language.csv" -NoTypeInformation -Encoding UTF8
        Write-Host "Written $($csvObjects.Length) row(s) to $path\TisaneFamilyCleanupTest_$language.csv" -ForegroundColor Green
    }
}
# Generate-FamilyCleanupTestCSV -familyIDs 28309,31652,55783,25642,115289 -languages 'hi','ru','fr','id','he','ar' -noiseFamilyIDs 119113,114678,12445 -path "Source\Public\benchmarking"
# Generate-FamilyCleanupTestCSV -familyIDs 28309,31652,55783,25642,115289 -languages "hi" -noiseFamilyIDs 119113,114678,12445 -path "Source\Public\benchmarking"
#EndRegion '.\Public\Generate-FamilyCleanupTestCSV.ps1' 103
#Region '.\Public\Reset-TisaneSettings.ps1' 0
# Function Get-TisaneSettings($settingName, $defaultValue) {
# $settingValue = [Environment]::GetEnvironmentVariable($settingName, 'User')
# if ([string]::IsNullOrEmpty($settingValue)) {
# return $defaultValue
# } else {
# return $settingValue
# }
# }

Function Reset-TisaneSettings(){
    Save-TisaneSettings -settingName 'TisaneApiKey' -settingValue $null
}

#EndRegion '.\Public\Reset-TisaneSettings.ps1' 14
#Region '.\Public\Set-TisaneApiKey.ps1' 0
Function Set-TisaneApiKey(){
    $baseUrl = "https://api.tisane.ai"
    $TisaneApiKey = [Environment]::GetEnvironmentVariable('TisaneApiKey', 'User')#Get-TisaneSettings -settingName 'TisaneApiKey' -defaultValue ''
    if ([string]::IsNullOrEmpty($TisaneApiKey)){
        $TisaneApiKey = Read-Host -Prompt 'Please enter your Tisane API Key'
        if ([string]::IsNullOrEmpty($TisaneApiKey)){
            Throw "No api key provided"
        }
        Validate-ApiKey -apiKey $TisaneApiKey -baseUrl $baseUrl
        Save-TisaneSettings -settingName 'TisaneApiKey' -settingValue $TisaneApiKey
        return $TisaneApiKey
    }
    Validate-ApiKey -apiKey $TisaneApiKey -baseUrl $baseUrl
    return $TisaneApiKey
  }
#EndRegion '.\Public\Set-TisaneApiKey.ps1' 16
#Region '.\Public\Tisane-Web.ps1' 0
## =============================================================================
##
## This script's purpose is to send a request to Tisane API
##
## =============================================================================


## overcome the HTTPS error: https://stackoverflow.com/questions/11696944/powershell-v3-invoke-webrequest-https-error
function Tisane-Web{
    Param(
        [Parameter(Mandatory = $true,  HelpMessage= 'Enter request body, default : {"language": "en", "content": "buy ice", "settings": {"deterministic": true, "format": "dialogue", "parses": false, "sentiment": false, "words": false, "snippets":true, "entities": true, "topic_standard":"native", "optimize_topics":true}}')][String] $body = '{"language": "en", "content": "buy ice", "settings": {"deterministic": true, "format": "dialogue", "parses": false, "sentiment": false, "words": false, "snippets":true, "entities": true, "topic_standard":"native", "optimize_topics":true}}'
    )
# '{"language": "en", "content": "buy ice", "settings": {"deterministic": true, "format": "dialogue", "parses": false, "sentiment": false, "words": false, "snippets":true, "entities": true, "topic_standard":"native", "optimize_topics":true}}'
$tisaneApiKey = Set-TisaneApiKey
$header = @{}
$header.Add('Ocp-Apim-Subscription-Key', $tisaneApiKey)

$tisaneResponse = Invoke-WebRequest -Uri "https://api.tisane.ai/parse" -Method POST -Headers $header -Body $body -UseBasicParsing -SkipCertificateCheck

$inJson = ConvertFrom-Json -InputObject $tisaneResponse 
$tisaneResponse
}
#EndRegion '.\Public\Tisane-Web.ps1' 24
#Region '.\Public\Validate-ApiKey.ps1' 0
Function Validate-ApiKey {
    Param(
        [Parameter(Mandatory = $true)][String] $apiKey,
        [Parameter(Mandatory = $true)][String] $baseUrl
    )

    $headers = @{
        "Content-Type" = "application/json;charset=utf-8";
        "Ocp-Apim-Subscription-Key" = $apiKey
    }

    $languageUrl = "$baseUrl/languages"
    $familyUrl = "$baseUrl/lm/family?id=43344"

    try {
        $response = Invoke-WebRequest -Uri $languageUrl -Headers $headers -Method Get
        if ($response.StatusCode -ne 200) {
            Throw "Invalid API key. Error message: $($response.Content)"
        }
    } catch {
        Throw "Invalid API key. Error message: $($_.Exception.Message)"
    }

    try {
        $response = Invoke-WebRequest -Uri $familyUrl -Headers $headers -Method Get
        if ($response.StatusCode -ne 200) {
            Throw "API key does not have access to LLMS. Error message: $($response.Content)"
        }
    } catch {
        Throw " Error message: $($_.Exception.Message)"
    }
}
#EndRegion '.\Public\Validate-ApiKey.ps1' 33