Public/Get-TeletekstNews.ps1

function NormalizeTitle([string]$Text)
{
    # Remove leading whitespace
    $Text = $Text -replace '^\s*', ''

    # Remove trailing whitespace
    $Text = $Text -replace '\s*$',''

    # Normalize remaining text
    $Text = NormalizeText($Text)

    $Text
}

function NormalizeText([string]$Text)
{
    # Comma followed by non-whitepace, e.g. 'foo,baz'
    $Text = $Text -replace ',([\S])', ', $1'

    # Comma preceded by digit, followed by space and digit, e.g. '2, 2 liters'
    $Text = $Text -replace '(\d),\s(\d)', '$1,$2'

    # (Semi)colon followed by non-whitespace, e.g. 'foo:baz'
    $Text = $Text -replace '([:;])(\S)', '$1 $2'

    # Full stop followed by a letter, e.g. 'foo.baz'
    $Text = $Text -replace '\.([a-zA-Z])', '. $1'

    # Hyphen in compound words, e.g. 'Schengen-landen'
    $Text = $Text -replace '-\s', '-'

    # Times
    $Text = $Text -replace '(\d{0,2})\.\s+(\d{2})\suur', '$1.$2 uur'

    $Text
}

function GetTitle([string]$Content)
{
    $Content | pup '.doubleHeight text{}' --plain
}

function GetNewsContent([string]$Content)
{
    $Content `
    | ForEach-Object { ($_ -split "`n").Trim() } `
    | Where-Object { $_ } `
    | ForEach-Object { $_ -replace '<a [^>]*?>', '' -replace '</a>', '' } `
    | Select-Object -SkipLast 1 `
    | ForEach-Object { ($_ | pup 'span.cyan text{}' --plain) } `
    | ForEach-Object { $_.Trim() }
}

function Get-TeletekstNews
{
    param
    (
        [Parameter(Mandatory)]
        [ValidateSet('Domestic', 'Foreign')]
        [string[]]$Type
    )

    $Type.ForEach{
        $CurrentType = $_

        $PageRange = switch ($CurrentType)
        {
            'Domestic' { 104..124 }
            'Foreign' { 125..137 }
        }

        $CurrentPage = $PageRange[0]

        while ($CurrentPage -in $PageRange)
        {
            $PageData = Invoke-RestMethod -Uri "https://teletekst-data.nos.nl/json/$CurrentPage"

            [PSCustomObject]@{
                Type       = $CurrentType
                Page       = $CurrentPage
                DateTime   = Get-Date
                Title      = NormalizeTitle(GetTitle($PageData.Content))
                Link       = "https://nos.nl/teletekst#$($CurrentPage)"
                Content    = NormalizeText(GetNewsContent($PageData.Content) -join ' ')
                PSTypeName = 'UncommonSense.Teletekst.NewsStory'
            }

            $CurrentPage = $PageData.NextPage
        }
    }
}