Public/Get-TomGauld.ps1
function Get-TomGauld { param ( [switch]$All ) $NextUrl = 'https://www.theguardian.com/profile/tom-gauld' do { $Links = Invoke-WebRequest -Uri $NextUrl | Select-Object -ExpandProperty Links $Links | Where-Object HRef -Like 'https://www.theguardian.com/books/picture/*' | Select-Object -ExpandProperty HRef -Unique | ForEach-Object { $ImagePageContent = Invoke-WebRequest -Uri $_ | Select-Object -ExpandProperty Content $ImageUrl = ($ImagePageContent | pup 'source:first-of-type attr{srcset}' --plain) -replace '1880w$', '' $Title = ($ImagePageContent | pup 'h1 text{}' | ForEach-Object { $_.Trim() } | Where-Object { $_ }) -join ' ' $DateText = ($ImagePageContent | pup 'time:first-of-type attr{datetime}') -replace '[+-](\d{4})$', '' [PSCustomObject][Ordered]@{ PSTypeName = 'UncommonSense.TheGuardian.Article' Url = $_ Date = [DateTime]::ParseExact($DateText, 's', $null) Title = $Title -replace '\s–\scartoon$', '' Body = $ImageUrl } } if ($All) { $NextUrl = $Links | Where-Object Rel -EQ Next | Select-Object -ExpandProperty HRef } else { $NextUrl = $null } } while ($NextUrl) } |