functions/connectors-serialization.ps1
function Write-CecConnector { [CmdletBinding()] param( [Parameter(ValueFromPipeline, Mandatory)]$Connector, [Parameter(Mandatory)][String]$Path, $Suffix = "", $Prefix = "", [Switch]$Subfolder, [Switch]$SkipFiles, [Switch]$RemoveDates ) begin { $suffixesAreSpecified = SuffixIsSpecified -Prefix $Prefix -Suffix $Suffix } process { $destinationPath = $Path if ($Subfolder) { $destinationPath = join-path $destinationPath (Remove-Suffix -Value $Connector.name -Suffix $Suffix -Prefix $Prefix) } if ($Connector.PSObject.Properties.Name -contains "connectorId") { Write-Information ("Writing connector {0} ({1}) to {2}" -f $Connector.name, $Connector.connectorId, $destinationPath) } else { Write-Information ("Writing connector {0} to {1}" -f $Connector.name, $destinationPath) } If (-not (Test-Path $destinationPath -PathType Container)) { New-item $destinationPath -ItemType Directory | Out-Null } $obj = $Connector | ConvertTo-Json -Depth 15 | ConvertFrom-Json if (-not $SkipFiles) { if ($suffixesAreSpecified) { $obj = Remove-CecConnectorPrefix -Connector $obj -Prefix $Prefix -Suffix $Suffix } if ($RemoveDates) { $obj = Remove-CecConnectorUserDates -Connector $obj } $crawlerTypes = @("webCrawlerConfig", "apiCrawlerConfig") foreach ($crawlerType in $crawlerTypes) { if (-not $obj.content.PSObject.Properties.Name.Contains('crawler')) { continue } if (-not $obj.content.crawler.PSObject.Properties.Name.Contains($crawlerType)) { continue } $crawlerConfig = $obj.content.crawler.$crawlerType if ($crawlerConfig.PSObject.Properties.Name -contains "extractors") { $documents = $crawlerConfig.extractors.documents $docIndex = 0 foreach ($doc in $documents) { foreach ($tagger in $doc.taggers) { Write-Tagger -ConnectorPath $destinationPath -DocumentExtractor $doc -Index $docIndex -Tagger $tagger } $docIndex++ } } if ($crawlerConfig.PSObject.Properties.Name -contains "triggers") { $triggers = $crawlerConfig.triggers $triggerIndex = 0 foreach ($trigger in $triggers) { Write-Trigger -ConnectorPath $destinationPath -BaseKey "trigger_${triggerIndex}" -Trigger $trigger $triggerIndex++ } } } } Set-Content -Path (Join-Path $destinationPath "connector.json") -Value (ConvertTo-Json -InputObject $obj -Depth 15) } } function Read-CecConnector { [CmdletBinding()] param( [ValidateScript({ Test-Path $_ -PathType Container })][Parameter(Mandatory)] [String]$Path, [Switch]$SkipFiles ) $connectorPath = Join-Path $Path "connector.json" If (-not (Test-Path $connectorPath -PathType Leaf)) { Write-Error "Required file ${connectorPath}" return } $connector = Get-Content $connectorPath | ConvertFrom-Json if (-not $SkipFiles) { $crawlerTypes = @("webCrawlerConfig", "apiCrawlerConfig") foreach ($crawlerType in $crawlerTypes) { if (-not $connector.content.crawler.PSObject.Properties.Name.Contains($crawlerType)) { continue } $crawlerConfig = $connector.content.crawler.$crawlerType if ($crawlerConfig.PSObject.Properties.Name -contains "extractors") { $documents = $crawlerConfig.extractors.documents $docIndex = 0 foreach ($doc in $documents) { foreach ($tagger in $doc.taggers) { Read-Tagger -ConnectorPath $Path -DocumentExtractor $doc -Index $docIndex -Tagger $tagger } $docIndex++ } } if ($crawlerConfig.PSObject.Properties.Name -contains "triggers") { $triggers = $crawlerConfig.triggers $triggerIndex = 0 foreach ($trigger in $triggers) { Read-Trigger -ConnectorPath $Path -BaseKey "trigger_${triggerIndex}" -Trigger $trigger $triggerIndex++ } } } } $connector } $taggerSuffix = "`nmodule.exports = { extract };`n" function Write-Tagger { param( $ConnectorPath, $Tagger, $DocumentExtractor, $Index ) $type = $DocumentExtractor.type $tag = $Tagger.tag $fileName = "extractor_${Index}_${tag}.${type}" $taggerPath = (Join-Path $ConnectorPath $fileName) if ($Null -ne $Tagger -and $Tagger.PSObject.Properties.Name -contains "source" -and $Null -ne $Tagger.Source) { $source = ($Tagger.source).Replace("\r\n", "`n").Replace("\n", "`n") + $taggerSuffix Set-Content -Value $source -Path $taggerPath $tagger.source = "<exported to ${fileName}>" } } function Read-Tagger { param( $ConnectorPath, $Tagger, $DocumentExtractor, $Index ) $type = $DocumentExtractor.type $tag = $Tagger.tag $fileName = "extractor_${Index}_${tag}.${type}" $taggerPath = (Join-Path $ConnectorPath $fileName) if (Test-Path $taggerPath) { $Tagger.source = (Get-Content $taggerPath -Raw).Trim().Replace($taggerSuffix.Trim(), "").Replace("`r`n", "`n") } } function Write-Trigger { param( $ConnectorPath, $BaseKey, $Trigger ) $type = $Trigger.type if ($type -eq "request" -and $Null -ne $trigger.request) { $fileName = "${BaseKey}_${type}.http" $filePath = Join-Path $ConnectorPath $fileName $method = "GET" if ($trigger.request.PSObject.Properties.Name -contains "method") { $method = $trigger.request.method } $url = $trigger.request.url $fileContent = "${method} ${url}`n" if ($trigger.request.PSObject.Properties.Name -contains "headers") { $headers = $trigger.request.headers foreach ($key in $headers.PSObject.Properties.Name) { $values = $headers.$key foreach ($value in $values) { $fileContent += "${key}: ${value}`n" } } } if ($trigger.request.PSObject.Properties.name -contains "body") { $body = $trigger.request.body.Replace("\r\n", "`n").Replace("\n", "`n") $fileContent += "`n${body}" } Set-Content -Path $filePath -Value $fileContent $trigger.request = "<exported to ${fileName}>" } else { foreach ($p in $Trigger.PSObject.Properties.Name) { if (-not $Trigger.$p.PSObject.Properties.Name.Contains("urls")) { continue } $obj = $Trigger.$p $obj.urls = [Array]$obj.urls } } } function Read-Trigger { param( $ConnectorPath, $BaseKey, $Trigger ) $type = $Trigger.type $filePath = Join-Path $ConnectorPath "${BaseKey}_${type}.http" If (Test-Path $filePath) { $body = "" $method = $Null $url = $Null $headers = @{} $fileContent = Get-Content $filePath $isBody = $false foreach ($line in $fileContent) { if (-not $isBody) { if ($line.StartsWith("#")) { continue } if ($line.Trim() -eq "") { $isBody = $true continue } if ($null -eq $method) { $index = $line.IndexOf(" ") $method = $line.Substring(0, $index).Trim() $url = $line.Substring($index + 1).Trim() } else { $index = $line.IndexOf(":") $key = $line.Substring(0, $index) $value = $line.Substring($index + 1).Trim() $headers[$key] ??= @() $headers[$key] = @(, $value) + $headers[$key] } } else { $body += $line + "`n" } } # Remove the last \n $body = $body.TrimEnd() $trigger.request = @{ method = $method url = $url headers = $headers body = $body.Trim() } } else { foreach ($p in $Trigger.PSObject.Properties.Name) { if (-not $Trigger.$p.PSObject.Properties.Name.Contains("urls")) { continue } $obj = $Trigger.$p $obj.urls = [Array]$obj.urls } } } |