Private/Get-VolkskrantCartoon.ps1
function Get-VolkskrantCartoon { param ( [Parameter(Mandatory)] [string]$Uri, [Parameter(Mandatory)] [string]$Title ) $DutchCulture = New-Object -TypeName System.Globalization.CultureInfo -ArgumentList 'nl-NL' $Response = Invoke-WebRequest -Uri $Uri $Images = $Response | Select-Object -ExpandProperty Images | Where-Object { $_.src -Like 'https://image.volkskrant.nl/*/afbeelding' -or $_.src -match '.*/\d+-\w+-\d{4}$' } | Select-Object -ExpandProperty src $Dates = $Response | Select-Object -ExpandProperty Content | Select-String -Pattern '<h2 class="v2cotm0 _1uudmgm0">(.*?)</h2>' -All | Select-Object -ExpandProperty Matches | ForEach-Object { $_.Groups[1] } | Select-Object -ExpandProperty Value | ForEach-Object { $_ -replace ' ', ' ' } | ForEach-Object { $_ -replace '20224', '2024' } # typo? | ForEach-Object { [DateTime]::ParseExact($_, ([string[]]('d MMMM yyyy', 'd MMMM')), $DutchCulture) } $ImagesEnumerator = $Images.GetEnumerator() $DatesEnumerator = $Dates.GetEnumerator() $PreviousDate = [DateTime]::MinValue while ($ImagesEnumerator.MoveNext() -and $DatesEnumerator.MoveNext()) { $CurrentDate = $DatesEnumerator.Current if ($PreviousDate -ne [DateTime]::MinValue) { # Handle dates from last year whose year component was missing, and were incorrectly assumed to be in this year if ($CurrentDate.Year -gt $PreviousDate.Year) { # Correct if not at the turn of the year if ($CurrentDate.Month -ne 1 -or $PreviousDate.Month -ne 12) { $CurrentDate = $CurrentDate.AddYears(-1) } } } [PSCustomObject][Ordered]@{ PSTypeName = 'UncommonSense.Volkskrant.Article' Url = $ImagesEnumerator.Current Date = $CurrentDate Title = $Title Body = $ImagesEnumerator.Current } $PreviousDate = $CurrentDate } } |