Private/Get-VolkskrantArticle.ps1
function Get-VolkskrantArticle { param ( [Parameter(Mandatory)] [string]$Uri, [ScriptBlock]$UrlFilter = { $_ -like '*/nieuws-achtergrond/*' -or $_ -like '*/cultuur-media/*' } ) $DutchCulture = New-Object -TypeName System.Globalization.CultureInfo -ArgumentList 'nl-NL' Invoke-WebRequest -Uri $Uri ` | Select-Object -ExpandProperty Links ` | Select-Object -ExpandProperty HRef ` | Where-Object -FilterScript $UrlFilter | Where-Object { $_ -notlike '*copyright-auteursrechten-volkskrant*' } | Where-Object { $_ -notlike '*het-colofon-van-de-volkskrant*' } | ForEach-Object { $Url = $_ $Content = Invoke-WebRequest -Uri $Url | Select-Object -ExpandProperty Content $Document = ConvertTo-HtmlDocument -Text $Content $DateText = (($Document | Select-HtmlNode -XPath '//meta[@property="article:published_time"]').GetAttributeValue("content", "") -split 'T')[0] $Date = [DateTime]::ParseExact($DateText, 'yyyy\-MM\-dd', $DutchCulture) $Title = (($Document | Select-HtmlNode -CssSelector 'h1' -All).InnerText | ForEach-Object { $_.Trim() } | Where-Object { $_ }) -join ' ' $Paragraphs = $Document | Select-HtmlNode -CssSelector 'section' | Select-HtmlNode -CssSelector 'p' -All | Where-Object { $_.GetAttributeValue('class', '') -ne 'artstyle__container__text' } $Body = (($Paragraphs).InnerText | ForEach-Object { $_.Trim() } | Where-Object { $_ }) -join ' ' [PSCustomObject][Ordered]@{ PSTypeName = 'UncommonSense.Volkskrant.Article' Url = $Url Date = $Date DateText = $DateText Title = $Title Body = $Body } } } |