Private/Get-TrouwArticle.ps1
function Get-TrouwArticle { param ( [Parameter(Mandatory, Position = 0)] [string]$Uri, [Parameter()] [ScriptBlock]$UriFilter ) $DutchCulture = New-Object -TypeName System.Globalization.CultureInfo -ArgumentList 'nl-NL' Invoke-WebRequest -Uri $Uri ` | Select-Object -ExpandProperty Links ` | Select-Object -ExpandProperty HRef ` | Where-Object -FilterScript $UriFilter ` | ForEach-Object { $Url = $_ -replace '^/', 'https://trouw.nl/' $Content = Invoke-WebRequest -Uri $Url | Select-Object -ExpandProperty Content $Document = ConvertTo-HtmlDocument -Text $Content $DateText = ($Document | Select-HtmlNode -CssSelector '.artstyle__production__date').InnerText $Date = [DateTime]::ParseExact($DateText, 'd MMMM yyyy', $DutchCulture) $Title = (($Document | Select-HtmlNode -CssSelector 'h1').InnerText | ForEach-Object { $_.Trim() } | Where-Object { $_ }) -join ' ' $Body = (($Document | Select-HtmlNode -CssSelector '.artstyle__main p' -All).InnerText | ForEach-Object { $_.Trim() } | Where-Object { $_ }) -join ' ' [PSCustomObject]@{ PSTypeName = 'UncommonSense.Trouw.Article' Url = $Url Date = $Date Title = $Title Body = $Body } Start-Sleep -Seconds 1 # Prevent nginx 429 error (too many requests) } } |